#!/bin/sh usage="usage: $0 dir Calculate the average confidence of each page, saving them in a file called hocr-avgpgconf in the book directory." test $# -ne 1 && echo "$usage" && exit 1 TRAININGNAME=rescribealphav4 ### calculate averages for hocr (tesseract) o="$1/hocr-avgpgconf" printf '' > "$o" for i in "$1"/*hocr do b=`basename "$i" .hocr | sed 's/_'"$TRAININGNAME"'//; s/.jpg//g; s/.png//g'` pgavg=`avg-lines "$i" 2>/dev/null \ | awk -F ':' '{print $2}' \ | sed 's/%//g;s/ //g' \ | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'` printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o" done ### calculate averages for prob (ocropus) o="$1/prob-avgpgconf" printf '' > "$o" for i in "$1"/???? do b=`basename "$i"` pgavg=`avg-lines "$i"/*prob 2>/dev/null \ | awk -F ':' '{print $2}' \ | sed 's/%//g;s/ //g' \ | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'` printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o" done