summaryrefslogtreecommitdiff
path: root/bookgraph.sh
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-02-25 17:19:48 +0000
committerNick White <git@njw.name>2019-02-25 17:19:48 +0000
commit0aec35a060a9f9e1e33c18bf0e2af2aafd6a6257 (patch)
tree06681e52bf515814728e87c3971b50b178d9b480 /bookgraph.sh
Add various helper scripts
Diffstat (limited to 'bookgraph.sh')
-rw-r--r--bookgraph.sh34
1 files changed, 34 insertions, 0 deletions
diff --git a/bookgraph.sh b/bookgraph.sh
new file mode 100644
index 0000000..8113a5e
--- /dev/null
+++ b/bookgraph.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+usage="$0 bookdir
+
+Creates a graph showing the average confidence of each page in a book.
+This relies on the hocr-avgpgconf file being present, which is generated
+by the allpgsconf.sh script"
+
+test $# -ne 1 && echo "$usage" && exit 1
+
+! test -f "$1/hocr-avgpgconf" && echo "No $1/hocr-avgpgconf file found; run allpgsconf.sh before running this" && exit 1
+! test -f "$1/prob-avgpgconf" && echo "No $1/prob-avgpgconf file found; run allpgsconf.sh before running this" && exit 1
+
+t=`mktemp`
+# NOTE: this expects book file naming to be in the format nnnn_sometext
+sed 's/_[^\t]*//g' < "$1/hocr-avgpgconf" > "$t"
+
+b=`basename "$1"`
+
+gp=`mktemp`
+printf 'set style data lines\n' >> "$gp"
+printf 'set title "Book Confidence for %s"\n' "$b" >> "$gp"
+printf 'set xlabel "Page Number"\n' >> "$gp"
+printf 'set ylabel "Page confidence"\n' >> "$gp"
+printf 'set mxtics\n' >> "$gp"
+printf 'set terminal png truecolor size 4600,1700\n' >> "$gp"
+printf 'set output "%s"\n' "$1/bookgraph.png" >> "$gp"
+printf 'plot "%s" using 1:2 with lines title "Tesseract",\\\n' "$t" >> "$gp"
+printf ' "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\\n' >> "$gp"
+printf ' "%s" using 1:2 with lines title "Ocropus",\\\n' "$1/prob-avgpgconf" >> "$gp"
+printf ' "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle\n' >> "$gp"
+printf 'quit\n' >> "$gp"
+gnuplot "$gp"
+
+rm -f "$t" "$gp"