From ef82770cbefb4ceaf894d0f37cd8468c9054b86a Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 5 Jun 2019 17:37:23 +0100 Subject: Rename bookgraphv2.sh to the canonical bookgraph Add word count to the graph. Use a scaled figure so it's easy to compare with the confidence. --- bookgraph | 45 +++++++++++++++++++++++++++++++++++++++++++++ bookgraphv2.sh | 40 ---------------------------------------- 2 files changed, 45 insertions(+), 40 deletions(-) create mode 100755 bookgraph delete mode 100644 bookgraphv2.sh diff --git a/bookgraph b/bookgraph new file mode 100755 index 0000000..8010bdc --- /dev/null +++ b/bookgraph @@ -0,0 +1,45 @@ +#!/bin/sh +usage="$0 bookdir + +Creates a graph showing the average confidence and a (scaled) word +count of each page in a book. The word count is scaled (divided by +10 and plus 50) to make it easy to compare to the confidence, by +generally occupying a similar scale." + +test $# -ne 1 && echo "$usage" && exit 1 + +if ! command -v pgconf > /dev/null ; then + echo "Error: no pgconf tool found" + exit 1 +fi + +t=`mktemp` + +find "$1/best" -maxdepth 1 -type f -name '*hocr' | while read i; do + c=`pgconf "$i"` + n=`basename "$i" .hocr | sed 's/_.*//'` + w=`grep ocrx_word "$i" | wc -l` + printf '%s\t%d\t%d\n' "$n" "$c" "$w" >> "$t" +done + +r=`readlink -f "$1"` +b=`basename "$r"` + +gp=`mktemp` +printf ' + +set style data lines +set title "Book Confidence for %s" +set xlabel "Page Number" +set ylabel "Page Confidence %%" +set mxtics +set terminal png truecolor size 4600,1700 +set output "%s" +plot "%s" using 1:2 with lines title "Confidence",\\ + "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\ + "" using 1:($3/10+50) with lines title "Number of words (scaled)" +quit +' "$b" "$1/${b}_bookgraph.png" "$t" >> "$gp" +gnuplot "$gp" + +rm -f "$t" "$gp" diff --git a/bookgraphv2.sh b/bookgraphv2.sh deleted file mode 100644 index 3f6cfaa..0000000 --- a/bookgraphv2.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh -usage="$0 bookdir - -Creates a graph showing the average confidence of each page in a book." - -test $# -ne 1 && echo "$usage" && exit 1 - -if ! command -v pgconf > /dev/null ; then - echo "Error: no pgconf tool found" - exit 1 -fi - -t=`mktemp` - -find "$1/best" -maxdepth 1 -type f -name '*hocr' | while read i; do - c=`pgconf "$i"` - n=`basename "$i" .hocr | sed 's/_.*//'` - printf '%s\t%d\n' "$n" "$c" >> "$t" -done - -r=`readlink -f "$1"` -b=`basename "$r"` - -gp=`mktemp` -printf ' - -set style data lines -set title "Book Confidence for %s" -set xlabel "Page Number" -set ylabel "Page Confidence %%" -set mxtics -set terminal png truecolor size 4600,1700 -set output "%s" -plot "%s" using 1:2 with lines title "Confidence",\\ - "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle -quit -' "$b" "$1/${b}_bookgraph.png" "$t" >> "$gp" -gnuplot "$gp" - -rm -f "$t" "$gp" -- cgit v1.2.1-24-ge1ad