From 58e51317883ebb4c66b92e6c1cc807e937f780cb Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 8 May 2019 19:51:06 +0100 Subject: Ensure dir-to-pdf saves to dirname.pdf not dirname/.pdf, and handle all different naming conventions --- dir-to-pdf.sh | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/dir-to-pdf.sh b/dir-to-pdf.sh index 345d908..a0dc95f 100755 --- a/dir-to-pdf.sh +++ b/dir-to-pdf.sh @@ -44,7 +44,7 @@ if ! test -d "$1"; then fi if test $# -eq 2 ; then - outfile="$2" + outfile=`echo "$2" | sed 's/\/$//'` else outfile="$1.pdf" fi @@ -58,12 +58,27 @@ fi mkdir -p "$tmpdir" || exit 1 echo "Copying hocrs and converting pngs from $1 to $tmpdir" -find "$1" -maxdepth 1 -type f -name '*.unpapered.png' | while read i; do - b=`basename "$i" .unpapered.png` +n=`find "$1" -maxdepth 1 -type f -name '*.unpapered.png' | wc -l` +if test $n -gt 0 ; then + imgsuffix=".unpapered.png" + hocrsuffix="_unpapered_" +else + n=`find "$1" -maxdepth 1 -type f -name '*.binarized.png' | wc -l` + if test $n -gt 0 ; then + imgsuffix=".binarized.png" + hocrsuffix="_" + else + echo "Error: no pages found" + exit 1 + fi +fi + +find "$1" -maxdepth 1 -type f -name '*'"$imgsuffix" | while read i; do + b=`basename "$i" "$imgsuffix"` hocr="" for t in $TRAININGS; do - n=`echo "$i" | sed "s/.unpapered.png/_unpapered_$t.hocr/"` + n=`echo "$i" | sed "s/${imgsuffix}/${hocrsuffix}$t.hocr/"` test -f "$n" && hocr="$n" done if test -z "$hocr"; then -- cgit v1.2.1-24-ge1ad