blob: 8010bdc0ba287fb5b2bcb5eab76720bcf205031a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
#!/bin/sh
usage="$0 bookdir
Creates a graph showing the average confidence and a (scaled) word
count of each page in a book. The word count is scaled (divided by
10 and plus 50) to make it easy to compare to the confidence, by
generally occupying a similar scale."
test $# -ne 1 && echo "$usage" && exit 1
if ! command -v pgconf > /dev/null ; then
echo "Error: no pgconf tool found"
exit 1
fi
t=`mktemp`
find "$1/best" -maxdepth 1 -type f -name '*hocr' | while read i; do
c=`pgconf "$i"`
n=`basename "$i" .hocr | sed 's/_.*//'`
w=`grep ocrx_word "$i" | wc -l`
printf '%s\t%d\t%d\n' "$n" "$c" "$w" >> "$t"
done
r=`readlink -f "$1"`
b=`basename "$r"`
gp=`mktemp`
printf '
set style data lines
set title "Book Confidence for %s"
set xlabel "Page Number"
set ylabel "Page Confidence %%"
set mxtics
set terminal png truecolor size 4600,1700
set output "%s"
plot "%s" using 1:2 with lines title "Confidence",\\
"" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\
"" using 1:($3/10+50) with lines title "Number of words (scaled)"
quit
' "$b" "$1/${b}_bookgraph.png" "$t" >> "$gp"
gnuplot "$gp"
rm -f "$t" "$gp"
|