summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-06-05 17:37:23 +0100
committerNick White <git@njw.name>2019-06-05 17:37:23 +0100
commitef82770cbefb4ceaf894d0f37cd8468c9054b86a (patch)
treeb1390f2d1d792bee5eb24e229296516ec2e0e662
parentf7ecb8b610e1cc2029b588c8cdc4c073ea4faca5 (diff)
Rename bookgraphv2.sh to the canonical bookgraph
Add word count to the graph. Use a scaled figure so it's easy to compare with the confidence.
-rwxr-xr-x[-rw-r--r--]bookgraph (renamed from bookgraphv2.sh)11
1 files changed, 8 insertions, 3 deletions
diff --git a/bookgraphv2.sh b/bookgraph
index 3f6cfaa..8010bdc 100644..100755
--- a/bookgraphv2.sh
+++ b/bookgraph
@@ -1,7 +1,10 @@
#!/bin/sh
usage="$0 bookdir
-Creates a graph showing the average confidence of each page in a book."
+Creates a graph showing the average confidence and a (scaled) word
+count of each page in a book. The word count is scaled (divided by
+10 and plus 50) to make it easy to compare to the confidence, by
+generally occupying a similar scale."
test $# -ne 1 && echo "$usage" && exit 1
@@ -15,7 +18,8 @@ t=`mktemp`
find "$1/best" -maxdepth 1 -type f -name '*hocr' | while read i; do
c=`pgconf "$i"`
n=`basename "$i" .hocr | sed 's/_.*//'`
- printf '%s\t%d\n' "$n" "$c" >> "$t"
+ w=`grep ocrx_word "$i" | wc -l`
+ printf '%s\t%d\t%d\n' "$n" "$c" "$w" >> "$t"
done
r=`readlink -f "$1"`
@@ -32,7 +36,8 @@ set mxtics
set terminal png truecolor size 4600,1700
set output "%s"
plot "%s" using 1:2 with lines title "Confidence",\\
- "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle
+ "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\
+ "" using 1:($3/10+50) with lines title "Number of words (scaled)"
quit
' "$b" "$1/${b}_bookgraph.png" "$t" >> "$gp"
gnuplot "$gp"