summaryrefslogtreecommitdiff
path: root/dir-to-pdf.sh
blob: 399bc1605d9c390f706b3e57b38727f07acb2e31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/sh
usage="Usage: $0 indir

Creates a PDF from image and hocr files in indir, saving it to indir.pdf.

The necessary files are first copied to a temporary directory where
they are renamed and reformatted for the hocr-pdf tool from hocr-tools.

The PDF is then created with hocr-pdf, and the temporary directory is
removed."

test $# -ne 1 && echo "$usage" && exit 1

# All possible training files to check for
TRAININGS="rescribealphav4 rescribealphav5"

# Set image compression and dpi
QUALITY=0
DPI=600

if command -v gm > /dev/null ; then
	convert="gm convert"
elif command -v convert > /dev/null ; then
	convert="convert"
else
	echo "Error: no graphicksmagick or imagemagick found"
	exit 1
fi

if ! command -v hocr-pdf > /dev/null ; then
	echo "Error: no hocr-pdf tool found"
	exit 1
fi

if ! test -d "$1"; then
	echo "Error: $1 does not exist"
	exit 1
fi

tmpdir=`mktemp -d`
if test $? -ne 0 ; then
	echo "Error: Failed to create temporary directory"
	exit 1
fi

mkdir -p "$tmpdir" || exit 1

echo "Copying hocrs and converting pngs from $1 to $tmpdir"
find "$1" -maxdepth 1 -type f -name '*.unpapered.png' | while read i; do
	b=`basename "$i" .unpapered.png`

	hocr=""
	for t in $TRAININGS; do
		n=`echo "$i" | sed "s/.unpapered.png/_unpapered_$t.hocr/"`
		test -f "$n" && hocr="$n"
	done
	if test -z "$hocr"; then
		echo "Warning: no corresponding hocr file found for $i, skipping."
		continue
	fi

	$convert -quality $QUALITY -density ${DPI}x${DPI} "$i" "$tmpdir/$b.jpg" || exit 1
	cp "$hocr" "$tmpdir/$b.hocr" || exit 1
done

echo "Creating PDF"
hocr-pdf --savefile "$1.pdf" "$tmpdir" || exit 1

echo "Created a PDF at $1.pdf"
rm -rf "$tmpdir"