blob: 60ea4d6288aece26e9bc93b0601c4d2a4669ff8d (
plain)
| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
 | #!/bin/sh
usage="usage: $0 dir
Calculate the average confidence of each page, saving them in a file
called hocr-avgpgconf in the book directory."
test $# -ne 1 && echo "$usage" && exit 1
### calculate averages for hocr (tesseract)
o="$1/hocr-avgpgconf"
printf '' > "$o"
for i in "$1"/*hocr
do
	b=`basename "$i" .hocr`
	pgavg=`avg-lines "$i" 2>/dev/null \
	| awk -F ':' '{print $2}' \
	| sed 's/%//g;s/ //g' \
	| awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'`
	printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o"
done
### calculate averages for prob (ocropus)
o="$1/prob-avgpgconf"
printf '' > "$o"
for i in "$1"/????
do
	b=`basename "$i"`
	pgavg=`avg-lines "$i"/*prob 2>/dev/null \
	| awk -F ':' '{print $2}' \
	| sed 's/%//g;s/ //g' \
	| awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'`
	printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o"
done
 |