summaryrefslogtreecommitdiff
path: root/dir-to-pdf.sh
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-08 15:23:30 +0100
committerNick White <git@njw.name>2019-05-08 15:23:30 +0100
commit7bef4bc5eee200960ee0870974d922e763c16882 (patch)
tree244bd93d952f71c4d424a831105b986a7de75853 /dir-to-pdf.sh
parent9673976e3a563ba3ebf183c7f18df2ae5c64b141 (diff)
Rename pdf prep tool as it creates the pdf too now
Diffstat (limited to 'dir-to-pdf.sh')
-rwxr-xr-xdir-to-pdf.sh70
1 files changed, 70 insertions, 0 deletions
diff --git a/dir-to-pdf.sh b/dir-to-pdf.sh
new file mode 100755
index 0000000..399bc16
--- /dev/null
+++ b/dir-to-pdf.sh
@@ -0,0 +1,70 @@
+#!/bin/sh
+usage="Usage: $0 indir
+
+Creates a PDF from image and hocr files in indir, saving it to indir.pdf.
+
+The necessary files are first copied to a temporary directory where
+they are renamed and reformatted for the hocr-pdf tool from hocr-tools.
+
+The PDF is then created with hocr-pdf, and the temporary directory is
+removed."
+
+test $# -ne 1 && echo "$usage" && exit 1
+
+# All possible training files to check for
+TRAININGS="rescribealphav4 rescribealphav5"
+
+# Set image compression and dpi
+QUALITY=0
+DPI=600
+
+if command -v gm > /dev/null ; then
+ convert="gm convert"
+elif command -v convert > /dev/null ; then
+ convert="convert"
+else
+ echo "Error: no graphicksmagick or imagemagick found"
+ exit 1
+fi
+
+if ! command -v hocr-pdf > /dev/null ; then
+ echo "Error: no hocr-pdf tool found"
+ exit 1
+fi
+
+if ! test -d "$1"; then
+ echo "Error: $1 does not exist"
+ exit 1
+fi
+
+tmpdir=`mktemp -d`
+if test $? -ne 0 ; then
+ echo "Error: Failed to create temporary directory"
+ exit 1
+fi
+
+mkdir -p "$tmpdir" || exit 1
+
+echo "Copying hocrs and converting pngs from $1 to $tmpdir"
+find "$1" -maxdepth 1 -type f -name '*.unpapered.png' | while read i; do
+ b=`basename "$i" .unpapered.png`
+
+ hocr=""
+ for t in $TRAININGS; do
+ n=`echo "$i" | sed "s/.unpapered.png/_unpapered_$t.hocr/"`
+ test -f "$n" && hocr="$n"
+ done
+ if test -z "$hocr"; then
+ echo "Warning: no corresponding hocr file found for $i, skipping."
+ continue
+ fi
+
+ $convert -quality $QUALITY -density ${DPI}x${DPI} "$i" "$tmpdir/$b.jpg" || exit 1
+ cp "$hocr" "$tmpdir/$b.hocr" || exit 1
+done
+
+echo "Creating PDF"
+hocr-pdf --savefile "$1.pdf" "$tmpdir" || exit 1
+
+echo "Created a PDF at $1.pdf"
+rm -rf "$tmpdir"