From 9673976e3a563ba3ebf183c7f18df2ae5c64b141 Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 8 May 2019 13:46:16 +0100 Subject: Use sane page numbering for erara scraper --- scrape-erara.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scrape-erara.sh b/scrape-erara.sh index 11754d6..c2da6f2 100644 --- a/scrape-erara.sh +++ b/scrape-erara.sh @@ -29,15 +29,17 @@ fi # Note that this loses page numbering. pgids=`echo "$iiifmanifest" | sed 's/"/\n/g' | awk -F '/' '/i3f/ {print $7}' | sort | uniq` +pgnum=0 for i in $pgids; do test $i -eq $bookid && continue # skip book id, which is not a real page id - pgname=`printf '%s_%s' "$bookid" "$i"` - echo "Downloading page $i to ${pgname}.jpg" + pgnum=`expr $pgnum + 1` + pgname=`printf '%s_%04d' "$bookid" "$pgnum"` + echo "Downloading page id $i to ${pgname}.jpg" pgurl="https://www.e-rara.ch/zut/i3f/v21/${i}/full/full/0/native.jpg" curl -s -f "$pgurl" > "${pgname}.jpg" if test $? -ne 0; then - echo "Error downloading page $i: $pgurl" + echo "Error downloading page id $i (number ${pgnum}): $pgurl" rm -f "${pgname}.jpg" fi done -- cgit v1.2.1-24-ge1ad