summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-14 17:34:50 +0100
committerNick White <git@njw.name>2019-05-14 17:34:50 +0100
commit28091bd3a22de816aa12c8e96636384d9a516fea (patch)
treeab0a107aa5821a95509a011c681f09cfb870b0c0
parent52aa98a276666976930c0e73f5e2be794762e821 (diff)
Add bookgraphv2, to go hand in hand with fullocrdir
-rw-r--r--bookgraphv2.sh39
1 files changed, 39 insertions, 0 deletions
diff --git a/bookgraphv2.sh b/bookgraphv2.sh
new file mode 100644
index 0000000..9984ede
--- /dev/null
+++ b/bookgraphv2.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+usage="$0 bookdir
+
+Creates a graph showing the average confidence of each page in a book."
+
+test $# -ne 1 && echo "$usage" && exit 1
+
+if ! command -v pgconf > /dev/null ; then
+ echo "Error: no pgconf tool found"
+ exit 1
+fi
+
+t=`mktemp`
+
+find "$1/best" -maxdepth 1 -type f -name '*hocr' | while read i; do
+ c=`pgconf "$i"`
+ n=`basename "$i" .hocr | sed 's/_.*//'`
+ printf '%s\t%d\n' "$n" "$c" >> "$t"
+done
+
+b=`basename "$1"`
+
+gp=`mktemp`
+printf '
+
+set style data lines
+set title "Book Confidence for %s"
+set xlabel "Page Number"
+set ylabel "Page Confidence %%"
+set mxtics
+set terminal png truecolor size 4600,1700
+set output "%s"
+plot "%s" using 1:2 with lines title "Confidence",\\
+ "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle
+quit
+' "$b" "$1/${b}_bookgraph.png" "$t" >> "$gp"
+gnuplot "$gp"
+
+#rm -f "$t" "$gp"