blob: 9e8f8255d4b97d1643e85655dd20b15e7b04e9ae (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
#!/bin/sh
usage="Usage: $0 bsburl
bsburl: The book index page, e.g.
https://reader.digitale-sammlungen.de/de/fs1/object/display/bsb11274872_00005.html
Downloads all pages from a book on digitale-sammlungen.de
Note that some books may initially fail in downloading the book index
page. The reasons for this are not exactly clear, but try again in a
few minutes and it may well work. It is probably to do with some
strange caching issue."
test $# -ne 1 && echo "$usage" && exit 1
bookid=`echo "$1" | awk -F / '{printf("%s\n", $8)}'|sed 's/_.*//g'`
xml=`curl -f -s "http://daten.digitale-sammlungen.de/~db/mets/${bookid}_mets.xml"`
if test $? -ne 0 ; then
echo "Error downloading book index page: http://daten.digitale-sammlungen.de/~db/mets/${bookid}_mets.xml"
exit 1
fi
urls=`echo "$xml" | grep mets:FLocat | grep 'images/150' | awk -F '"' '{print $2}'`
mkdir -p "$bookid"
if test $? -ne 0 ; then
echo "Failed to mkdir $bookname"
exit 1
fi
echo "$urls" | while read i; do
pgnum=`echo "$i" | awk -F '_' '{print $2}'`
echo "Downloading page $pgnum to $bookid/$pgnum.jpg"
curl -f -s "$i" > "${bookid}/${pgnum}.jpg"
if test $? -ne 0 ; then
echo "Download failed for page ${pgnum}: $i"
rm -f "${bookid}/${pgnum}.jpg"
fi
done
|