diff options
author | Nick White <git@njw.name> | 2019-02-25 17:19:48 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-02-25 17:19:48 +0000 |
commit | 0aec35a060a9f9e1e33c18bf0e2af2aafd6a6257 (patch) | |
tree | 06681e52bf515814728e87c3971b50b178d9b480 /allpgsconf.sh |
Add various helper scripts
Diffstat (limited to 'allpgsconf.sh')
-rw-r--r-- | allpgsconf.sh | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/allpgsconf.sh b/allpgsconf.sh new file mode 100644 index 0000000..60ea4d6 --- /dev/null +++ b/allpgsconf.sh @@ -0,0 +1,40 @@ +#!/bin/sh +usage="usage: $0 dir + +Calculate the average confidence of each page, saving them in a file +called hocr-avgpgconf in the book directory." +test $# -ne 1 && echo "$usage" && exit 1 + +### calculate averages for hocr (tesseract) +o="$1/hocr-avgpgconf" + +printf '' > "$o" + +for i in "$1"/*hocr +do + b=`basename "$i" .hocr` + + pgavg=`avg-lines "$i" 2>/dev/null \ + | awk -F ':' '{print $2}' \ + | sed 's/%//g;s/ //g' \ + | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'` + + printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o" +done + +### calculate averages for prob (ocropus) +o="$1/prob-avgpgconf" + +printf '' > "$o" + +for i in "$1"/???? +do + b=`basename "$i"` + + pgavg=`avg-lines "$i"/*prob 2>/dev/null \ + | awk -F ':' '{print $2}' \ + | sed 's/%//g;s/ //g' \ + | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'` + + printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o" +done |