#!/bin/sh usage="Usage: $0 traineddata evaldir" test $# -ne 2 && echo "$usage" && exit 1 evaldir="$2" here=`dirname "$0"` # Make box files find "$evaldir" -type f -name '*txt' | while read i; do b=`basename "$i" .txt` d=`dirname "$i"` # skip generating box files files if there's a box or lstmf file already present test -f "$d/$b.box" && continue test -f "$d/${b}_lstmf.lstmf" && continue python "${here}/generate_line_box.py" -i "$d/$b.png" -t "$i" > "$d/$b.box" || exit 1 done # Make lstmf files (a combined .box and .png file) find "$evaldir" -type f -name '*box' | while read i; do b=`basename "$i" .box` d=`dirname "$i"` # skip generating lstmf files if they're already present test -f "$d/${b}_lstmf.lstmf" && continue tesseract "$d/$b.png" "$d/${b}_lstmf" --psm 6 lstm.train 2>/dev/null || exit 1 # the lstm.train process creates a blank .txt rm -f "$d/${b}_lstmf.txt" rm -f "$i" done find "$evaldir" -type f -name '*lstmf' > "$evaldir/list" # Run lstmeval evalout=`lstmeval --model "$1" --eval_listfile "$evaldir/list" -verbosity 0 2>&1` test $? -ne 0 && echo "Error in lstmeval: $evalout" echo "$evalout" | awk '/Char error rate/ {print $7 " " $8 " " $9 " " $10 " " $11 " " $12}' rm -f "$evaldir/list"