From a3ce33af0b4e7ba9b8463443c3ebf4a7797d160a Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 26 Mar 2019 18:05:10 +0000 Subject: Make book graph scripts more robust to dodgy page filenames, and name bookgraph better --- allpgsconf.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'allpgsconf.sh') diff --git a/allpgsconf.sh b/allpgsconf.sh index 60ea4d6..82b24f0 100644 --- a/allpgsconf.sh +++ b/allpgsconf.sh @@ -5,6 +5,8 @@ Calculate the average confidence of each page, saving them in a file called hocr-avgpgconf in the book directory." test $# -ne 1 && echo "$usage" && exit 1 +TRAININGNAME=rescribealphav4 + ### calculate averages for hocr (tesseract) o="$1/hocr-avgpgconf" @@ -12,7 +14,7 @@ printf '' > "$o" for i in "$1"/*hocr do - b=`basename "$i" .hocr` + b=`basename "$i" .hocr | sed 's/_'"$TRAININGNAME"'//; s/.jpg//g; s/.png//g'` pgavg=`avg-lines "$i" 2>/dev/null \ | awk -F ':' '{print $2}' \ -- cgit v1.2.1-24-ge1ad