summaryrefslogtreecommitdiff
path: root/bookgraph.sh
blob: 8113a5e1ae3867832732206fcb8539f9a180e013 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/sh
usage="$0 bookdir

Creates a graph showing the average confidence of each page in a book.
This relies on the hocr-avgpgconf file being present, which is generated
by the allpgsconf.sh script"

test $# -ne 1 && echo "$usage" && exit 1

! test -f "$1/hocr-avgpgconf" && echo "No $1/hocr-avgpgconf file found; run allpgsconf.sh before running this" && exit 1
! test -f "$1/prob-avgpgconf" && echo "No $1/prob-avgpgconf file found; run allpgsconf.sh before running this" && exit 1

t=`mktemp`
# NOTE: this expects book file naming to be in the format nnnn_sometext
sed 's/_[^\t]*//g' < "$1/hocr-avgpgconf" > "$t"

b=`basename "$1"`

gp=`mktemp`
printf 'set style data lines\n' >> "$gp"
printf 'set title "Book Confidence for %s"\n' "$b" >> "$gp"
printf 'set xlabel "Page Number"\n' >> "$gp"
printf 'set ylabel "Page confidence"\n' >> "$gp"
printf 'set mxtics\n' >> "$gp"
printf 'set terminal png truecolor size 4600,1700\n' >> "$gp"
printf 'set output "%s"\n' "$1/bookgraph.png" >> "$gp"
printf 'plot "%s" using 1:2 with lines title "Tesseract",\\\n' "$t" >> "$gp"
printf '     "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\\n' >> "$gp"
printf '     "%s" using 1:2 with lines title "Ocropus",\\\n' "$1/prob-avgpgconf" >> "$gp"
printf '     "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle\n' >> "$gp"
printf 'quit\n' >> "$gp"
gnuplot "$gp"

rm -f "$t" "$gp"