summaryrefslogtreecommitdiff
path: root/bookgraph.sh
blob: 8926d665275464145b36e7d1233f0e58b6277137 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/sh
usage="$0 bookdir

Creates a graph showing the average confidence of each page in a book.
This relies on the hocr-avgpgconf and prob-avgpgconf files being present,
which are generated by the allpgsconf.sh script."

test $# -ne 1 && echo "$usage" && exit 1

! test -f "$1/hocr-avgpgconf" && echo "No $1/hocr-avgpgconf file found; run allpgsconf.sh before running this" && exit 1
! test -f "$1/prob-avgpgconf" && echo "No $1/prob-avgpgconf file found; run allpgsconf.sh before running this" && exit 1

t=`mktemp`
# NOTE: this expects book file naming to be in the format nnnn_sometext
sed 's/_[^\t]*//g' < "$1/hocr-avgpgconf" > "$t"

b=`basename "$1"`

gp=`mktemp`
printf '

set style data lines
set title "Book Confidence for %s"
set xlabel "Page Number"
set ylabel "Page confidence"
set mxtics
set terminal png truecolor size 4600,1700
set output "%s"
plot "%s" using 1:2 with lines title "Tesseract",\\
     "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle,\\
     "%s" using 1:2 with lines title "Ocropus",\\
     "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle
quit
' "$b" "$1/${b}_bookgraph.png" "$t" "$1/prob-avgpgconf" >> "$gp"
gnuplot "$gp"

rm -f "$t" "$gp"