From 2d9e3c01298194f248c619f88f018e960768394d Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 15 Jul 2019 14:25:08 +0100 Subject: Ensure eebotopdf.sh uses a /tmp dir for tmp files --- eebotopdf.sh | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/eebotopdf.sh b/eebotopdf.sh index 1be00f7..4143571 100644 --- a/eebotopdf.sh +++ b/eebotopdf.sh @@ -22,25 +22,28 @@ fi root=`basename "$3" .pdf` +t=`mktemp -d` +test $? -ne 0 && exit 1 + # extract images to png, then convert to jpg, as originals aren't jpg # but hocr-pdf requires them echo "Extracting images from original PDF" -pdfimages -png "$1" "$root" || exit 1 -for i in "$root"*png; do +pdfimages -png "$1" "$t/$root" || exit 1 +for i in "$t/$root"*png; do b=`basename "$i" .png` - $convert "$i" "$b.jpg" || exit 1 + $convert "$i" "$t/$b.jpg" || exit 1 rm "$i" done echo "Extracting text from XML" -eeboxmltohocr "$2" "$root" || exit 1 +eeboxmltohocr "$2" "$t/$root" || exit 1 # remove any images that don't have a corresponding hocr -for i in *jpg; do +for i in "$t/"*jpg; do b=`basename "$i" .jpg` test -f "$b.hocr" || rm "$i" done echo "Combining images and text into PDF" -hocr-pdf . > "$3" || exit 1 -rm "$root"*jpg "$root"*hocr +hocr-pdf "$t" > "$3" || exit 1 +rm "$t/$root"*jpg "$root"*hocr -- cgit v1.2.1-24-ge1ad