summaryrefslogtreecommitdiff
path: root/scrape-bnf.sh
blob: 677e4d4070ce1c265376dadad55d587c082617ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/bin/sh
usage="Usage: $0 bnfurl"

test $# -ne 1 && echo "$usage" && exit 1

bookid=`echo "$1" |awk -F / '{printf("%s/%s\n", $5, $6)}'`
bookid_name=`echo "$bookid" | sed 's/\//_/'`

html=`curl -s "https://gallica.bnf.fr/ark:/${bookid}"`

pagenum=`echo "$html" | sed 's/.*nbTotalVues\\\"://g' | sed 's/,.*//'`

for i in `seq "$pagenum"`; do
	pgname=`printf "%s_%03d" "${bookid_name}" "${i}"`
	echo "Downloading page $i of $pagenum to ${pgname}.jpg"
	curl -s "https://gallica.bnf.fr/iiif/ark:/${bookid}/f${i}/full/full/0/native.jpg" > "${pgname}.jpg"
done