From dbadecf183942bab038b926f24b44b8a07732aff Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 13 Feb 2020 17:47:59 +0000 Subject: Add testtraining script --- testtraining.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 testtraining.sh diff --git a/testtraining.sh b/testtraining.sh new file mode 100755 index 0000000..93a91fe --- /dev/null +++ b/testtraining.sh @@ -0,0 +1,42 @@ +#!/bin/sh +usage="Usage: $0 traineddata evaldir" + +test $# -ne 2 && echo "$usage" && exit 1 + +evaldir="$2" + +# Make box files +find "$evaldir" -type f -name '*txt' | while read i; do + b=`basename "$i" .txt` + d=`dirname "$i"` + + # skip generating box files files if there's a box or lstmf file already present + test -f "$d/$b.box" && continue + test -f "$d/$b.lstmf" && continue + + python ~/bigboy/othertools/generate_line_box.py -i "$d/$b.png" -t "$i" > "$d/$b.box" || exit 1 +done + + +# Make lstmf files (a combined .box and .png file) +find "$evaldir" -type f -name '*box' | while read i; do + b=`basename "$i" .box` + d=`dirname "$i"` + + # skip generating lstmf files if they're already present + test -f "$d/${b}_lstmf.lstmf" && continue + + tesseract "$d/$b.png" "$d/${b}_lstmf" --psm 6 lstm.train 2>/dev/null || exit 1 + # the lstm.train process creates a blank .txt + rm -f "$d/${b}_lstmf.txt" + rm -f "$i" +done + +find "$evaldir" -type f -name '*lstmf' > "$evaldir/list" + +# Run lstmeval +evalout=`lstmeval --model "$1" --eval_listfile "$evaldir/list" -verbosity 0 2>&1` +test $? -ne 0 && echo "Error in lstmeval: $evalout" +echo "$evalout" | awk '/Char error rate/ {print $7 " " $8 " " $9 " " $10 " " $11 " " $12}' + +rm -f "$evaldir/list" -- cgit v1.2.1-24-ge1ad