diff options
author | Nick White <git@njw.name> | 2019-06-05 17:37:23 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-06-05 17:37:23 +0100 |
commit | ef82770cbefb4ceaf894d0f37cd8468c9054b86a (patch) | |
tree | b1390f2d1d792bee5eb24e229296516ec2e0e662 | |
parent | f7ecb8b610e1cc2029b588c8cdc4c073ea4faca5 (diff) |
Rename bookgraphv2.sh to the canonical bookgraph
Add word count to the graph. Use a scaled figure so it's easy to compare with the confidence.
-rwxr-xr-x[-rw-r--r--] | bookgraph (renamed from bookgraphv2.sh) | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/bookgraphv2.sh b/bookgraph index 3f6cfaa..8010bdc 100644..100755 --- a/bookgraphv2.sh +++ b/bookgraph @@ -1,7 +1,10 @@ #!/bin/sh usage="$0 bookdir -Creates a graph showing the average confidence of each page in a book." +Creates a graph showing the average confidence and a (scaled) word +count of each page in a book. The word count is scaled (divided by +10 and plus 50) to make it easy to compare to the confidence, by +generally occupying a similar scale." test $# -ne 1 && echo "$usage" && exit 1 @@ -15,7 +18,8 @@ t=`mktemp` find "$1/best" -maxdepth 1 -type f -name '*hocr' | while read i; do c=`pgconf "$i"` n=`basename "$i" .hocr | sed 's/_.*//'` - printf '%s\t%d\n' "$n" "$c" >> "$t" + w=`grep ocrx_word "$i" | wc -l` + printf '%s\t%d\t%d\n' "$n" "$c" "$w" >> "$t" done r=`readlink -f "$1"` @@ -32,7 +36,8 @@ set mxtics set terminal png truecolor size 4600,1700 set output "%s" plot "%s" using 1:2 with lines title "Confidence",\\ - "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle + "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\ + "" using 1:($3/10+50) with lines title "Number of words (scaled)" quit ' "$b" "$1/${b}_bookgraph.png" "$t" >> "$gp" gnuplot "$gp" |