diff options
| -rw-r--r-- | scrape-bnf.sh | 22 | ||||
| -rwxr-xr-x | scrape-erara.sh | 2 | 
2 files changed, 19 insertions, 5 deletions
| diff --git a/scrape-bnf.sh b/scrape-bnf.sh index 677e4d4..f939832 100644 --- a/scrape-bnf.sh +++ b/scrape-bnf.sh @@ -1,17 +1,31 @@  #!/bin/sh -usage="Usage: $0 bnfurl" +usage="Usage: $0 bnfurl + +bnfurl: The book index page, e.g. +        https://gallica.bnf.fr/ark:/12148/bpt6k6468158v + +Downloads all pages from a book on e-rara.com, saving them to the +current directory."  test $# -ne 1 && echo "$usage" && exit 1 -bookid=`echo "$1" |awk -F / '{printf("%s/%s\n", $5, $6)}'` +bookid=`echo "$1" |awk -F / '{printf("%s/%s\n", $5, $6)}'|sed 's/\..*//g'`  bookid_name=`echo "$bookid" | sed 's/\//_/'` -html=`curl -s "https://gallica.bnf.fr/ark:/${bookid}"` +html=`curl -f -s "https://gallica.bnf.fr/ark:/${bookid}"` +if test $? -ne 0 ; then +	echo "Error: Failed to download book index: https://gallica.bnf.fr/ark:/${bookid}" +	exit 1 +fi  pagenum=`echo "$html" | sed 's/.*nbTotalVues\\\"://g' | sed 's/,.*//'`  for i in `seq "$pagenum"`; do  	pgname=`printf "%s_%03d" "${bookid_name}" "${i}"`  	echo "Downloading page $i of $pagenum to ${pgname}.jpg" -	curl -s "https://gallica.bnf.fr/iiif/ark:/${bookid}/f${i}/full/full/0/native.jpg" > "${pgname}.jpg" +	curl -f -s "https://gallica.bnf.fr/iiif/ark:/${bookid}/f${i}/full/full/0/native.jpg" > "${pgname}.jpg" +	if test $? -ne 0 ; then +		echo "Failed to download page ${pgname}: https://gallica.bnf.fr/iiif/ark:/${bookid}/f${i}/full/full/0/native.jpg" +		rm -f "${pgname}.jpg" +	fi  done diff --git a/scrape-erara.sh b/scrape-erara.sh index c2da6f2..c66a73b 100755 --- a/scrape-erara.sh +++ b/scrape-erara.sh @@ -5,7 +5,7 @@ eraraurl: The book index page, e.g.            https://www.e-rara.ch/zut/doi/10.3931/e-rara-10416  Downloads all pages from a book on e-rara.com, saving them to the -current directory. " +current directory."  test $# -ne 1 && echo "$usage" && exit 1 | 
