summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-02-13 17:47:59 +0000
committerNick White <git@njw.name>2020-02-13 17:47:59 +0000
commitdbadecf183942bab038b926f24b44b8a07732aff (patch)
treeb87b3f9d12f4613a167cedee658dbaa9d2525a27
parentb462f348785facdff1fe6e6bb0ec06aa3a047cc8 (diff)
Add testtraining script
-rwxr-xr-xtesttraining.sh42
1 files changed, 42 insertions, 0 deletions
diff --git a/testtraining.sh b/testtraining.sh
new file mode 100755
index 0000000..93a91fe
--- /dev/null
+++ b/testtraining.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+usage="Usage: $0 traineddata evaldir"
+
+test $# -ne 2 && echo "$usage" && exit 1
+
+evaldir="$2"
+
+# Make box files
+find "$evaldir" -type f -name '*txt' | while read i; do
+ b=`basename "$i" .txt`
+ d=`dirname "$i"`
+
+ # skip generating box files files if there's a box or lstmf file already present
+ test -f "$d/$b.box" && continue
+ test -f "$d/$b.lstmf" && continue
+
+ python ~/bigboy/othertools/generate_line_box.py -i "$d/$b.png" -t "$i" > "$d/$b.box" || exit 1
+done
+
+
+# Make lstmf files (a combined .box and .png file)
+find "$evaldir" -type f -name '*box' | while read i; do
+ b=`basename "$i" .box`
+ d=`dirname "$i"`
+
+ # skip generating lstmf files if they're already present
+ test -f "$d/${b}_lstmf.lstmf" && continue
+
+ tesseract "$d/$b.png" "$d/${b}_lstmf" --psm 6 lstm.train 2>/dev/null || exit 1
+ # the lstm.train process creates a blank .txt
+ rm -f "$d/${b}_lstmf.txt"
+ rm -f "$i"
+done
+
+find "$evaldir" -type f -name '*lstmf' > "$evaldir/list"
+
+# Run lstmeval
+evalout=`lstmeval --model "$1" --eval_listfile "$evaldir/list" -verbosity 0 2>&1`
+test $? -ne 0 && echo "Error in lstmeval: $evalout"
+echo "$evalout" | awk '/Char error rate/ {print $7 " " $8 " " $9 " " $10 " " $11 " " $12}'
+
+rm -f "$evaldir/list"