summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-08 19:51:06 +0100
committerNick White <git@njw.name>2019-05-08 19:51:06 +0100
commit58e51317883ebb4c66b92e6c1cc807e937f780cb (patch)
tree9b8c213cc831d0ec12d88cb9483438df4705893f
parentd05e5268f2e362e04b77aea1eadb1b900387999f (diff)
Ensure dir-to-pdf saves to dirname.pdf not dirname/.pdf, and handle all different naming conventions
-rwxr-xr-xdir-to-pdf.sh23
1 files changed, 19 insertions, 4 deletions
diff --git a/dir-to-pdf.sh b/dir-to-pdf.sh
index 345d908..a0dc95f 100755
--- a/dir-to-pdf.sh
+++ b/dir-to-pdf.sh
@@ -44,7 +44,7 @@ if ! test -d "$1"; then
fi
if test $# -eq 2 ; then
- outfile="$2"
+ outfile=`echo "$2" | sed 's/\/$//'`
else
outfile="$1.pdf"
fi
@@ -58,12 +58,27 @@ fi
mkdir -p "$tmpdir" || exit 1
echo "Copying hocrs and converting pngs from $1 to $tmpdir"
-find "$1" -maxdepth 1 -type f -name '*.unpapered.png' | while read i; do
- b=`basename "$i" .unpapered.png`
+n=`find "$1" -maxdepth 1 -type f -name '*.unpapered.png' | wc -l`
+if test $n -gt 0 ; then
+ imgsuffix=".unpapered.png"
+ hocrsuffix="_unpapered_"
+else
+ n=`find "$1" -maxdepth 1 -type f -name '*.binarized.png' | wc -l`
+ if test $n -gt 0 ; then
+ imgsuffix=".binarized.png"
+ hocrsuffix="_"
+ else
+ echo "Error: no pages found"
+ exit 1
+ fi
+fi
+
+find "$1" -maxdepth 1 -type f -name '*'"$imgsuffix" | while read i; do
+ b=`basename "$i" "$imgsuffix"`
hocr=""
for t in $TRAININGS; do
- n=`echo "$i" | sed "s/.unpapered.png/_unpapered_$t.hocr/"`
+ n=`echo "$i" | sed "s/${imgsuffix}/${hocrsuffix}$t.hocr/"`
test -f "$n" && hocr="$n"
done
if test -z "$hocr"; then