diff options
author | Nick White <git@njw.name> | 2019-07-15 14:25:08 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-07-15 14:25:08 +0100 |
commit | 2d9e3c01298194f248c619f88f018e960768394d (patch) | |
tree | b8383101888699f3089a12ac64277a7942e0e42d /eebotopdf.sh | |
parent | f86013d23ac9c778dc93cd2bceb1753845fe9910 (diff) |
Ensure eebotopdf.sh uses a /tmp dir for tmp files
Diffstat (limited to 'eebotopdf.sh')
-rw-r--r-- | eebotopdf.sh | 17 |
1 files changed, 10 insertions, 7 deletions
diff --git a/eebotopdf.sh b/eebotopdf.sh index 1be00f7..4143571 100644 --- a/eebotopdf.sh +++ b/eebotopdf.sh @@ -22,25 +22,28 @@ fi root=`basename "$3" .pdf` +t=`mktemp -d` +test $? -ne 0 && exit 1 + # extract images to png, then convert to jpg, as originals aren't jpg # but hocr-pdf requires them echo "Extracting images from original PDF" -pdfimages -png "$1" "$root" || exit 1 -for i in "$root"*png; do +pdfimages -png "$1" "$t/$root" || exit 1 +for i in "$t/$root"*png; do b=`basename "$i" .png` - $convert "$i" "$b.jpg" || exit 1 + $convert "$i" "$t/$b.jpg" || exit 1 rm "$i" done echo "Extracting text from XML" -eeboxmltohocr "$2" "$root" || exit 1 +eeboxmltohocr "$2" "$t/$root" || exit 1 # remove any images that don't have a corresponding hocr -for i in *jpg; do +for i in "$t/"*jpg; do b=`basename "$i" .jpg` test -f "$b.hocr" || rm "$i" done echo "Combining images and text into PDF" -hocr-pdf . > "$3" || exit 1 -rm "$root"*jpg "$root"*hocr +hocr-pdf "$t" > "$3" || exit 1 +rm "$t/$root"*jpg "$root"*hocr |