summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-07-15 14:25:08 +0100
committerNick White <git@njw.name>2019-07-15 14:25:08 +0100
commit2d9e3c01298194f248c619f88f018e960768394d (patch)
treeb8383101888699f3089a12ac64277a7942e0e42d
parentf86013d23ac9c778dc93cd2bceb1753845fe9910 (diff)
Ensure eebotopdf.sh uses a /tmp dir for tmp files
-rw-r--r--eebotopdf.sh17
1 files changed, 10 insertions, 7 deletions
diff --git a/eebotopdf.sh b/eebotopdf.sh
index 1be00f7..4143571 100644
--- a/eebotopdf.sh
+++ b/eebotopdf.sh
@@ -22,25 +22,28 @@ fi
root=`basename "$3" .pdf`
+t=`mktemp -d`
+test $? -ne 0 && exit 1
+
# extract images to png, then convert to jpg, as originals aren't jpg
# but hocr-pdf requires them
echo "Extracting images from original PDF"
-pdfimages -png "$1" "$root" || exit 1
-for i in "$root"*png; do
+pdfimages -png "$1" "$t/$root" || exit 1
+for i in "$t/$root"*png; do
b=`basename "$i" .png`
- $convert "$i" "$b.jpg" || exit 1
+ $convert "$i" "$t/$b.jpg" || exit 1
rm "$i"
done
echo "Extracting text from XML"
-eeboxmltohocr "$2" "$root" || exit 1
+eeboxmltohocr "$2" "$t/$root" || exit 1
# remove any images that don't have a corresponding hocr
-for i in *jpg; do
+for i in "$t/"*jpg; do
b=`basename "$i" .jpg`
test -f "$b.hocr" || rm "$i"
done
echo "Combining images and text into PDF"
-hocr-pdf . > "$3" || exit 1
-rm "$root"*jpg "$root"*hocr
+hocr-pdf "$t" > "$3" || exit 1
+rm "$t/$root"*jpg "$root"*hocr