summaryrefslogtreecommitdiff
path: root/allpgsconf.sh
blob: 82b24f0a9705beb1f1f2e49ced8584b9414619e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/sh
usage="usage: $0 dir

Calculate the average confidence of each page, saving them in a file
called hocr-avgpgconf in the book directory."
test $# -ne 1 && echo "$usage" && exit 1

TRAININGNAME=rescribealphav4

### calculate averages for hocr (tesseract)
o="$1/hocr-avgpgconf"

printf '' > "$o"

for i in "$1"/*hocr
do
	b=`basename "$i" .hocr | sed 's/_'"$TRAININGNAME"'//; s/.jpg//g; s/.png//g'`

	pgavg=`avg-lines "$i" 2>/dev/null \
	| awk -F ':' '{print $2}' \
	| sed 's/%//g;s/ //g' \
	| awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'`

	printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o"
done

### calculate averages for prob (ocropus)
o="$1/prob-avgpgconf"

printf '' > "$o"

for i in "$1"/????
do
	b=`basename "$i"`

	pgavg=`avg-lines "$i"/*prob 2>/dev/null \
	| awk -F ':' '{print $2}' \
	| sed 's/%//g;s/ //g' \
	| awk '{total += $1; n++} END{if(n > 0) {printf("%.2f\n", total/n)}}'`

	printf '%s\t%.2f\n' "$b" "$pgavg" >> "$o"
done