#!/bin/sh usage="$0 bookdir Creates a graph showing the average confidence and a (scaled) word count of each page in a book. The word count is scaled (divided by 10 and plus 50) to make it easy to compare to the confidence, by generally occupying a similar scale." test $# -ne 1 && echo "$usage" && exit 1 if ! command -v pgconf > /dev/null ; then echo "Error: no pgconf tool found" exit 1 fi t=`mktemp` find "$1/best" -maxdepth 1 -type f -name '*hocr' | while read i; do c=`pgconf "$i"` n=`basename "$i" .hocr | sed 's/_.*//'` w=`grep ocrx_word "$i" | wc -l` printf '%s\t%d\t%d\n' "$n" "$c" "$w" >> "$t" done r=`readlink -f "$1"` b=`basename "$r"` gp=`mktemp` printf ' set style data lines set title "Book Confidence for %s" set xlabel "Page Number" set ylabel "Page Confidence %%" set mxtics set terminal png truecolor size 4600,1700 set output "%s" plot "%s" using 1:2 with lines title "Confidence",\\ "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\ "" using 1:($3/10+50) with lines title "Number of words (scaled)" quit ' "$b" "$1/${b}_bookgraph.png" "$t" >> "$gp" gnuplot "$gp" rm -f "$t" "$gp"