summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-08 13:46:16 +0100
committerNick White <git@njw.name>2019-05-08 13:46:16 +0100
commit9673976e3a563ba3ebf183c7f18df2ae5c64b141 (patch)
tree012f7f41f6dc541f8384938ca105ed3c786c6fb2
parent710ff20cdb4fd435f95e0f0fd6cacb8838aaf3c9 (diff)
Use sane page numbering for erara scraper
-rw-r--r--scrape-erara.sh8
1 files changed, 5 insertions, 3 deletions
diff --git a/scrape-erara.sh b/scrape-erara.sh
index 11754d6..c2da6f2 100644
--- a/scrape-erara.sh
+++ b/scrape-erara.sh
@@ -29,15 +29,17 @@ fi
# Note that this loses page numbering.
pgids=`echo "$iiifmanifest" | sed 's/"/\n/g' | awk -F '/' '/i3f/ {print $7}' | sort | uniq`
+pgnum=0
for i in $pgids; do
test $i -eq $bookid && continue # skip book id, which is not a real page id
- pgname=`printf '%s_%s' "$bookid" "$i"`
- echo "Downloading page $i to ${pgname}.jpg"
+ pgnum=`expr $pgnum + 1`
+ pgname=`printf '%s_%04d' "$bookid" "$pgnum"`
+ echo "Downloading page id $i to ${pgname}.jpg"
pgurl="https://www.e-rara.ch/zut/i3f/v21/${i}/full/full/0/native.jpg"
curl -s -f "$pgurl" > "${pgname}.jpg"
if test $? -ne 0; then
- echo "Error downloading page $i: $pgurl"
+ echo "Error downloading page id $i (number ${pgnum}): $pgurl"
rm -f "${pgname}.jpg"
fi
done