From e50bd37655e55bf47eb0973e860f79441b0e7c9f Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 8 May 2019 10:07:03 +0100 Subject: Set DPI for images, and maximally compress jpg (with binarisation it doesn't make much difference) --- format-for-hocr-pdf.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'format-for-hocr-pdf.sh') diff --git a/format-for-hocr-pdf.sh b/format-for-hocr-pdf.sh index 89d4dd8..0059f81 100644 --- a/format-for-hocr-pdf.sh +++ b/format-for-hocr-pdf.sh @@ -9,6 +9,10 @@ test $# -ne 2 && echo "$usage" && exit 1 # All possible training files to check for TRAININGS="rescribealphav4 rescribealphav5" +# Set image compression and dpi +QUALITY=0 +DPI=600 + if ! test -d "$1"; then echo "Error: $1 does not exist" exit 1 @@ -29,7 +33,7 @@ find "$1" -maxdepth 1 -type f -name '*.binarized.png' | while read i; do continue fi - gm convert "$i" "$2/$b.jpg" || exit 1 + gm convert -quality $QUALITY -density ${DPI}x${DPI} "$i" "$2/$b.jpg" || exit 1 cp "$hocr" "$2/$b.hocr" || exit 1 done -- cgit v1.2.1-24-ge1ad