From d1b035a9aa977658652621630923e86847dcbb1f Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 15 Jul 2019 12:26:14 +0100 Subject: Make fullocrdir.sh only do things that haven't been done before --- fullocrdir.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fullocrdir.sh b/fullocrdir.sh index bac8121..7be36c5 100755 --- a/fullocrdir.sh +++ b/fullocrdir.sh @@ -31,6 +31,8 @@ find "$1" -maxdepth 1 -type f -name '*.jpg' | while read f; do b=`basename "$f" .jpg` d=`dirname "$f"` + test -f "$d/${b}_bin0.2.png" && echo "Skipping preprocessing for $b; .${b}_bin0.2.png already exists" && continue + preprocmulti "$f" "$d/$b" || exit 1 done @@ -39,7 +41,8 @@ find "$1" -maxdepth 1 -type f -name '*_bin?.?.png' | while read f; do b=`basename "$f" .png` d=`dirname "$f"` - # TODO: ensure to run the correct command here + test -f "$d/$b.hocr" && echo "Skipping tesseract for $b; .hocr already exists" && continue + tesseract -l $TRAINING "$f" "$d/$b" hocr || exit 1 done @@ -48,6 +51,8 @@ find "$1" -maxdepth 1 -type f -name '*.hocr' | while read f; do b=`basename "$f" .hocr` d=`dirname "$f"` + test -f "$d/${b}.conf" && echo "Skipping pgconf for $b; ${b}.conf already exists" && continue + pgconf "$f" > "$d/$b.conf" 2>/dev/null || rm -f "$d/$b.conf" done -- cgit v1.2.1-24-ge1ad