summaryrefslogtreecommitdiff
path: root/scrape-bnf.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scrape-bnf.sh')
-rw-r--r--scrape-bnf.sh19
1 files changed, 12 insertions, 7 deletions
diff --git a/scrape-bnf.sh b/scrape-bnf.sh
index f939832..a0c49ae 100644
--- a/scrape-bnf.sh
+++ b/scrape-bnf.sh
@@ -4,13 +4,12 @@ usage="Usage: $0 bnfurl
bnfurl: The book index page, e.g.
https://gallica.bnf.fr/ark:/12148/bpt6k6468158v
-Downloads all pages from a book on e-rara.com, saving them to the
-current directory."
+Downloads all pages from a book on bnf.fr"
test $# -ne 1 && echo "$usage" && exit 1
bookid=`echo "$1" |awk -F / '{printf("%s/%s\n", $5, $6)}'|sed 's/\..*//g'`
-bookid_name=`echo "$bookid" | sed 's/\//_/'`
+bookname=`echo "$bookid" | sed 's/\//_/'`
html=`curl -f -s "https://gallica.bnf.fr/ark:/${bookid}"`
if test $? -ne 0 ; then
@@ -20,12 +19,18 @@ fi
pagenum=`echo "$html" | sed 's/.*nbTotalVues\\\"://g' | sed 's/,.*//'`
+mkdir -p "$bookname"
+if test $? -ne 0 ; then
+ echo "Failed to mkdir $bookname"
+ exit 1
+fi
+
for i in `seq "$pagenum"`; do
- pgname=`printf "%s_%03d" "${bookid_name}" "${i}"`
- echo "Downloading page $i of $pagenum to ${pgname}.jpg"
- curl -f -s "https://gallica.bnf.fr/iiif/ark:/${bookid}/f${i}/full/full/0/native.jpg" > "${pgname}.jpg"
+ pgname=`printf '%04d' "${i}"`
+ echo "Downloading page $i of $pagenum to $bookname/$pgname.jpg"
+ curl -f -s "https://gallica.bnf.fr/iiif/ark:/${bookid}/f${i}/full/full/0/native.jpg" > "$bookname/$pgname.jpg"
if test $? -ne 0 ; then
echo "Failed to download page ${pgname}: https://gallica.bnf.fr/iiif/ark:/${bookid}/f${i}/full/full/0/native.jpg"
- rm -f "${pgname}.jpg"
+ rm -f "$bookname/$pgname.jpg"
fi
done