From 0aec35a060a9f9e1e33c18bf0e2af2aafd6a6257 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 25 Feb 2019 17:19:48 +0000 Subject: Add various helper scripts --- allpgsconf.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 allpgsconf.sh (limited to 'allpgsconf.sh') diff --git a/allpgsconf.sh b/allpgsconf.sh new file mode 100644 index 0000000..60ea4d6 --- /dev/null +++ b/allpgsconf.sh @@ -0,0 +1,40 @@ +#!/bin/sh +usage="usage: $0 dir + +Calculate the average confidence of each page, saving them in a file +called hocr-avgpgconf in the book directory." +test $# -ne 1 && echo "$usage" && exit 1 + +### calculate averages for hocr (tesseract) +o="$1/hocr-avgpgconf" + +printf '' > "$o" + +for i in "$1"/*hocr +do + b=`basename "$i" .hocr` + + pgavg=`avg-lines "$i" 2>/dev/null \ + | awk -F ':' '{print $2}' \ + | sed 's/%//g;s/ //g' \ + | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'` + + printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o" +done + +### calculate averages for prob (ocropus) +o="$1/prob-avgpgconf" + +printf '' > "$o" + +for i in "$1"/???? +do + b=`basename "$i"` + + pgavg=`avg-lines "$i"/*prob 2>/dev/null \ + | awk -F ':' '{print $2}' \ + | sed 's/%//g;s/ //g' \ + | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'` + + printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o" +done -- cgit v1.2.1-24-ge1ad