summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-08 10:07:03 +0100
committerNick White <git@njw.name>2019-05-08 10:07:03 +0100
commite50bd37655e55bf47eb0973e860f79441b0e7c9f (patch)
tree3bcebc207254032a4392ba7d7cbf84fd3ce64dc3
parentaa660900858adec7de2d2e85d2a9b0ae3ee01c4d (diff)
Set DPI for images, and maximally compress jpg (with binarisation it doesn't make much difference)
-rw-r--r--format-for-hocr-pdf.sh6
1 files changed, 5 insertions, 1 deletions
diff --git a/format-for-hocr-pdf.sh b/format-for-hocr-pdf.sh
index 89d4dd8..0059f81 100644
--- a/format-for-hocr-pdf.sh
+++ b/format-for-hocr-pdf.sh
@@ -9,6 +9,10 @@ test $# -ne 2 && echo "$usage" && exit 1
# All possible training files to check for
TRAININGS="rescribealphav4 rescribealphav5"
+# Set image compression and dpi
+QUALITY=0
+DPI=600
+
if ! test -d "$1"; then
echo "Error: $1 does not exist"
exit 1
@@ -29,7 +33,7 @@ find "$1" -maxdepth 1 -type f -name '*.binarized.png' | while read i; do
continue
fi
- gm convert "$i" "$2/$b.jpg" || exit 1
+ gm convert -quality $QUALITY -density ${DPI}x${DPI} "$i" "$2/$b.jpg" || exit 1
cp "$hocr" "$2/$b.hocr" || exit 1
done