From a3ce33af0b4e7ba9b8463443c3ebf4a7797d160a Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 26 Mar 2019 18:05:10 +0000 Subject: Make book graph scripts more robust to dodgy page filenames, and name bookgraph better --- allpgsconf.sh | 4 +++- bookgraph.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/allpgsconf.sh b/allpgsconf.sh index 60ea4d6..82b24f0 100644 --- a/allpgsconf.sh +++ b/allpgsconf.sh @@ -5,6 +5,8 @@ Calculate the average confidence of each page, saving them in a file called hocr-avgpgconf in the book directory." test $# -ne 1 && echo "$usage" && exit 1 +TRAININGNAME=rescribealphav4 + ### calculate averages for hocr (tesseract) o="$1/hocr-avgpgconf" @@ -12,7 +14,7 @@ printf '' > "$o" for i in "$1"/*hocr do - b=`basename "$i" .hocr` + b=`basename "$i" .hocr | sed 's/_'"$TRAININGNAME"'//; s/.jpg//g; s/.png//g'` pgavg=`avg-lines "$i" 2>/dev/null \ | awk -F ':' '{print $2}' \ diff --git a/bookgraph.sh b/bookgraph.sh index a4b1eff..8926d66 100644 --- a/bookgraph.sh +++ b/bookgraph.sh @@ -31,7 +31,7 @@ plot "%s" using 1:2 with lines title "Tesseract",\\ "%s" using 1:2 with lines title "Ocropus",\\ "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle quit -' "$b" "$1/bookgraph.png" "$t" "$1/prob-avgpgconf" >> "$gp" +' "$b" "$1/${b}_bookgraph.png" "$t" "$1/prob-avgpgconf" >> "$gp" gnuplot "$gp" rm -f "$t" "$gp" -- cgit v1.2.1-24-ge1ad