diff options
author | Nick White <git@njw.name> | 2019-03-26 18:05:10 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-03-26 18:05:10 +0000 |
commit | a3ce33af0b4e7ba9b8463443c3ebf4a7797d160a (patch) | |
tree | 3f78cfb7892739023481b86019d26aed2e955bdc /allpgsconf.sh | |
parent | a2648f87a504395567c5f0aa24e11cf46cb8baee (diff) |
Make book graph scripts more robust to dodgy page filenames, and name bookgraph better
Diffstat (limited to 'allpgsconf.sh')
-rw-r--r-- | allpgsconf.sh | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/allpgsconf.sh b/allpgsconf.sh index 60ea4d6..82b24f0 100644 --- a/allpgsconf.sh +++ b/allpgsconf.sh @@ -5,6 +5,8 @@ Calculate the average confidence of each page, saving them in a file called hocr-avgpgconf in the book directory." test $# -ne 1 && echo "$usage" && exit 1 +TRAININGNAME=rescribealphav4 + ### calculate averages for hocr (tesseract) o="$1/hocr-avgpgconf" @@ -12,7 +14,7 @@ printf '' > "$o" for i in "$1"/*hocr do - b=`basename "$i" .hocr` + b=`basename "$i" .hocr | sed 's/_'"$TRAININGNAME"'//; s/.jpg//g; s/.png//g'` pgavg=`avg-lines "$i" 2>/dev/null \ | awk -F ':' '{print $2}' \ |