#!/bin/sh usage="Usage: $0 indir Creates a PDF from image and hocr files in indir, saving it to indir.pdf. The necessary files are first copied to a temporary directory where they are renamed and reformatted for the hocr-pdf tool from hocr-tools. The PDF is then created with hocr-pdf, and the temporary directory is removed." test $# -ne 1 && echo "$usage" && exit 1 # All possible training files to check for TRAININGS="rescribealphav4 rescribealphav5" # Set image compression and dpi QUALITY=0 DPI=600 if command -v gm > /dev/null ; then convert="gm convert" elif command -v convert > /dev/null ; then convert="convert" else echo "Error: no graphicksmagick or imagemagick found" exit 1 fi if ! command -v hocr-pdf > /dev/null ; then echo "Error: no hocr-pdf tool found" exit 1 fi if ! test -d "$1"; then echo "Error: $1 does not exist" exit 1 fi tmpdir=`mktemp -d` if test $? -ne 0 ; then echo "Error: Failed to create temporary directory" exit 1 fi mkdir -p "$tmpdir" || exit 1 echo "Copying hocrs and converting pngs from $1 to $tmpdir" find "$1" -maxdepth 1 -type f -name '*.unpapered.png' | while read i; do b=`basename "$i" .unpapered.png` hocr="" for t in $TRAININGS; do n=`echo "$i" | sed "s/.unpapered.png/_unpapered_$t.hocr/"` test -f "$n" && hocr="$n" done if test -z "$hocr"; then echo "Warning: no corresponding hocr file found for $i, skipping." continue fi $convert -quality $QUALITY -density ${DPI}x${DPI} "$i" "$tmpdir/$b.jpg" || exit 1 cp "$hocr" "$tmpdir/$b.hocr" || exit 1 done echo "Creating PDF" hocr-pdf --savefile "$1.pdf" "$tmpdir" || exit 1 echo "Created a PDF at $1.pdf" rm -rf "$tmpdir"