diff options
author | Nick White <git@njw.name> | 2019-05-08 13:46:16 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-05-08 13:46:16 +0100 |
commit | 9673976e3a563ba3ebf183c7f18df2ae5c64b141 (patch) | |
tree | 012f7f41f6dc541f8384938ca105ed3c786c6fb2 | |
parent | 710ff20cdb4fd435f95e0f0fd6cacb8838aaf3c9 (diff) |
Use sane page numbering for erara scraper
-rw-r--r-- | scrape-erara.sh | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/scrape-erara.sh b/scrape-erara.sh index 11754d6..c2da6f2 100644 --- a/scrape-erara.sh +++ b/scrape-erara.sh @@ -29,15 +29,17 @@ fi # Note that this loses page numbering. pgids=`echo "$iiifmanifest" | sed 's/"/\n/g' | awk -F '/' '/i3f/ {print $7}' | sort | uniq` +pgnum=0 for i in $pgids; do test $i -eq $bookid && continue # skip book id, which is not a real page id - pgname=`printf '%s_%s' "$bookid" "$i"` - echo "Downloading page $i to ${pgname}.jpg" + pgnum=`expr $pgnum + 1` + pgname=`printf '%s_%04d' "$bookid" "$pgnum"` + echo "Downloading page id $i to ${pgname}.jpg" pgurl="https://www.e-rara.ch/zut/i3f/v21/${i}/full/full/0/native.jpg" curl -s -f "$pgurl" > "${pgname}.jpg" if test $? -ne 0; then - echo "Error downloading page $i: $pgurl" + echo "Error downloading page id $i (number ${pgnum}): $pgurl" rm -f "${pgname}.jpg" fi done |