summaryrefslogtreecommitdiff
path: root/format-for-hocr-pdf.sh
diff options
context:
space:
mode:
Diffstat (limited to 'format-for-hocr-pdf.sh')
-rw-r--r--format-for-hocr-pdf.sh43
1 files changed, 0 insertions, 43 deletions
diff --git a/format-for-hocr-pdf.sh b/format-for-hocr-pdf.sh
deleted file mode 100644
index 0059f81..0000000
--- a/format-for-hocr-pdf.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/sh
-usage="Usage: $0 indir outdir
-
-Creates a new directory with image and hocr files appropriately named
-and formatted for use with the the hocr-pdf tool from hocr-tools."
-
-test $# -ne 2 && echo "$usage" && exit 1
-
-# All possible training files to check for
-TRAININGS="rescribealphav4 rescribealphav5"
-
-# Set image compression and dpi
-QUALITY=0
-DPI=600
-
-if ! test -d "$1"; then
- echo "Error: $1 does not exist"
- exit 1
-fi
-mkdir -p "$2" || exit 1
-
-echo "Copying hocrs and converting pngs from $1 to $2"
-find "$1" -maxdepth 1 -type f -name '*.binarized.png' | while read i; do
- b=`basename "$i" .binarized.png`
-
- hocr=""
- for t in $TRAININGS; do
- n=`echo "$i" | sed "s/.binarized.png/_$t.hocr/"`
- test -f "$n" && hocr="$n"
- done
- if test -z "$hocr"; then
- echo "Warning: no corresponding hocr file found for $i, skipping."
- continue
- fi
-
- gm convert -quality $QUALITY -density ${DPI}x${DPI} "$i" "$2/$b.jpg" || exit 1
- cp "$hocr" "$2/$b.hocr" || exit 1
-done
-
-nhocr=`find "$2" -type f -name '*hocr'|wc -l`
-njpg=`find "$2" -type f -name '*jpg'|wc -l`
-echo "Done. There are $nhocr hocr files and $njpg jpg files in $2"
-