summaryrefslogtreecommitdiff
path: root/allpgsconf.sh
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-03-26 18:05:10 +0000
committerNick White <git@njw.name>2019-03-26 18:05:10 +0000
commita3ce33af0b4e7ba9b8463443c3ebf4a7797d160a (patch)
tree3f78cfb7892739023481b86019d26aed2e955bdc /allpgsconf.sh
parenta2648f87a504395567c5f0aa24e11cf46cb8baee (diff)
Make book graph scripts more robust to dodgy page filenames, and name bookgraph better
Diffstat (limited to 'allpgsconf.sh')
-rw-r--r--allpgsconf.sh4
1 files changed, 3 insertions, 1 deletions
diff --git a/allpgsconf.sh b/allpgsconf.sh
index 60ea4d6..82b24f0 100644
--- a/allpgsconf.sh
+++ b/allpgsconf.sh
@@ -5,6 +5,8 @@ Calculate the average confidence of each page, saving them in a file
called hocr-avgpgconf in the book directory."
test $# -ne 1 && echo "$usage" && exit 1
+TRAININGNAME=rescribealphav4
+
### calculate averages for hocr (tesseract)
o="$1/hocr-avgpgconf"
@@ -12,7 +14,7 @@ printf '' > "$o"
for i in "$1"/*hocr
do
- b=`basename "$i" .hocr`
+ b=`basename "$i" .hocr | sed 's/_'"$TRAININGNAME"'//; s/.jpg//g; s/.png//g'`
pgavg=`avg-lines "$i" 2>/dev/null \
| awk -F ':' '{print $2}' \