summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-07-15 12:26:14 +0100
committerNick White <git@njw.name>2019-07-15 12:26:14 +0100
commitd1b035a9aa977658652621630923e86847dcbb1f (patch)
tree23f518c5beddbe86be03d673e84a63f1082115fa
parent85129e39d214f13ce3974133bc17ed1eacae1d02 (diff)
Make fullocrdir.sh only do things that haven't been done before
-rwxr-xr-xfullocrdir.sh7
1 files changed, 6 insertions, 1 deletions
diff --git a/fullocrdir.sh b/fullocrdir.sh
index bac8121..7be36c5 100755
--- a/fullocrdir.sh
+++ b/fullocrdir.sh
@@ -31,6 +31,8 @@ find "$1" -maxdepth 1 -type f -name '*.jpg' | while read f; do
b=`basename "$f" .jpg`
d=`dirname "$f"`
+ test -f "$d/${b}_bin0.2.png" && echo "Skipping preprocessing for $b; .${b}_bin0.2.png already exists" && continue
+
preprocmulti "$f" "$d/$b" || exit 1
done
@@ -39,7 +41,8 @@ find "$1" -maxdepth 1 -type f -name '*_bin?.?.png' | while read f; do
b=`basename "$f" .png`
d=`dirname "$f"`
- # TODO: ensure to run the correct command here
+ test -f "$d/$b.hocr" && echo "Skipping tesseract for $b; .hocr already exists" && continue
+
tesseract -l $TRAINING "$f" "$d/$b" hocr || exit 1
done
@@ -48,6 +51,8 @@ find "$1" -maxdepth 1 -type f -name '*.hocr' | while read f; do
b=`basename "$f" .hocr`
d=`dirname "$f"`
+ test -f "$d/${b}.conf" && echo "Skipping pgconf for $b; ${b}.conf already exists" && continue
+
pgconf "$f" > "$d/$b.conf" 2>/dev/null || rm -f "$d/$b.conf"
done