summaryrefslogtreecommitdiff
path: root/allpgsconf.sh
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-02-25 17:19:48 +0000
committerNick White <git@njw.name>2019-02-25 17:19:48 +0000
commit0aec35a060a9f9e1e33c18bf0e2af2aafd6a6257 (patch)
tree06681e52bf515814728e87c3971b50b178d9b480 /allpgsconf.sh
Add various helper scripts
Diffstat (limited to 'allpgsconf.sh')
-rw-r--r--allpgsconf.sh40
1 files changed, 40 insertions, 0 deletions
diff --git a/allpgsconf.sh b/allpgsconf.sh
new file mode 100644
index 0000000..60ea4d6
--- /dev/null
+++ b/allpgsconf.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+usage="usage: $0 dir
+
+Calculate the average confidence of each page, saving them in a file
+called hocr-avgpgconf in the book directory."
+test $# -ne 1 && echo "$usage" && exit 1
+
+### calculate averages for hocr (tesseract)
+o="$1/hocr-avgpgconf"
+
+printf '' > "$o"
+
+for i in "$1"/*hocr
+do
+ b=`basename "$i" .hocr`
+
+ pgavg=`avg-lines "$i" 2>/dev/null \
+ | awk -F ':' '{print $2}' \
+ | sed 's/%//g;s/ //g' \
+ | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'`
+
+ printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o"
+done
+
+### calculate averages for prob (ocropus)
+o="$1/prob-avgpgconf"
+
+printf '' > "$o"
+
+for i in "$1"/????
+do
+ b=`basename "$i"`
+
+ pgavg=`avg-lines "$i"/*prob 2>/dev/null \
+ | awk -F ':' '{print $2}' \
+ | sed 's/%//g;s/ //g' \
+ | awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'`
+
+ printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o"
+done