diff options
author | Nick White <git@njw.name> | 2019-03-26 18:05:10 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-03-26 18:05:10 +0000 |
commit | a3ce33af0b4e7ba9b8463443c3ebf4a7797d160a (patch) | |
tree | 3f78cfb7892739023481b86019d26aed2e955bdc | |
parent | a2648f87a504395567c5f0aa24e11cf46cb8baee (diff) |
Make book graph scripts more robust to dodgy page filenames, and name bookgraph better
-rw-r--r-- | allpgsconf.sh | 4 | ||||
-rw-r--r-- | bookgraph.sh | 2 |
2 files changed, 4 insertions, 2 deletions
diff --git a/allpgsconf.sh b/allpgsconf.sh index 60ea4d6..82b24f0 100644 --- a/allpgsconf.sh +++ b/allpgsconf.sh @@ -5,6 +5,8 @@ Calculate the average confidence of each page, saving them in a file called hocr-avgpgconf in the book directory." test $# -ne 1 && echo "$usage" && exit 1 +TRAININGNAME=rescribealphav4 + ### calculate averages for hocr (tesseract) o="$1/hocr-avgpgconf" @@ -12,7 +14,7 @@ printf '' > "$o" for i in "$1"/*hocr do - b=`basename "$i" .hocr` + b=`basename "$i" .hocr | sed 's/_'"$TRAININGNAME"'//; s/.jpg//g; s/.png//g'` pgavg=`avg-lines "$i" 2>/dev/null \ | awk -F ':' '{print $2}' \ diff --git a/bookgraph.sh b/bookgraph.sh index a4b1eff..8926d66 100644 --- a/bookgraph.sh +++ b/bookgraph.sh @@ -31,7 +31,7 @@ plot "%s" using 1:2 with lines title "Tesseract",\\ "%s" using 1:2 with lines title "Ocropus",\\ "" using 1:2:(sprintf("%%d", $1)) with labels point pt 2 notitle quit -' "$b" "$1/bookgraph.png" "$t" "$1/prob-avgpgconf" >> "$gp" +' "$b" "$1/${b}_bookgraph.png" "$t" "$1/prob-avgpgconf" >> "$gp" gnuplot "$gp" rm -f "$t" "$gp" |