diff options
| author | Nick White <git@njw.name> | 2020-09-28 15:55:14 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2020-09-28 15:55:14 +0100 | 
| commit | d1efeb891e11c13fd0748fa7ee3b7e4c3c946d63 (patch) | |
| tree | 29551d5587f40f303767e8390000823396f9597c | |
| parent | 251c0ea75d6ad28dbea7ad38bf15349a3b108d9e (diff) | |
Add ability to pass -service to choose which download type to use, plus add a -bookdir flag to set download directory
| -rw-r--r-- | cmd/iiifdownloader/main.go | 83 | 
1 files changed, 72 insertions, 11 deletions
diff --git a/cmd/iiifdownloader/main.go b/cmd/iiifdownloader/main.go index a1fba4f..593a93e 100644 --- a/cmd/iiifdownloader/main.go +++ b/cmd/iiifdownloader/main.go @@ -25,8 +25,8 @@ Currently supports the following IIIF using services:  - BNF's Gallica   example url: https://gallica.bnf.fr/ark:/12148/bpt6k6468158v  - BSB / MDZ       example url: https://reader.digitale-sammlungen.de//de/fs1/object/display/bsb10132387_00005.html  - DFG Viewer      example url: http://dfg-viewer.de/show?set%%5Bmets%%5D=http%%3A%%2F%%2Fdaten.digitale-sammlungen.de%%2F~db%%2Fmets%%2Fbsb11274872_mets.xml&cHash=fd18451ee968c125ab2bdbfd3717eae6 -- IIIF Manifest (TODO) -- METS Manifest (TODO) +- IIIF Manifest   example url: https://iiif.bodleian.ox.ac.uk/iiif/manifest/441db95d-cdff-472e-bb2d-b46f043db82d.json +- METS Manifest   example url: https://daten.digitale-sammlungen.de/~db/mets/bsb10132387_mets.xml  ` @@ -305,7 +305,27 @@ func sanitiseUrl(u string) string {  	return s  } +// detectService finds which service to use based on the +// url passed to it. +func detectService(url string) string { +	switch { +	case strings.HasSuffix(url, "/manifest"): +		return "iiifmanifest" +	case strings.HasSuffix(url, "mets.xml"): +		return "mets" +	case strings.HasPrefix(url, bnfPrefix): +		return "bnf" +	case strings.HasPrefix(url, bsbPrefix): +		return "bsb" +	case strings.HasPrefix(url, dfgPrefix): +		return "dfg" +	} +	return "" +} +  func main() { +	service := flag.String("service", "", "Force use of a specific service rather than autodetecting based on the URL (choose one of: bnf, bsb, mets, iiifmanifest)") +	dir := flag.String("bookdir", "", "Save book pages to this directory")  	forcemets := flag.Bool("mets", false, "Force METS metadata to be used (BSB / MDZ only)")  	flag.Usage = func() {  		fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -326,9 +346,28 @@ func main() {  	var pgUrls []string  	var noPgNums bool  	var err error +	var useservice string -	switch { -	case strings.HasPrefix(u, bnfPrefix): +	if *dir != "" { +		bookdir = *dir +	} + +	if *service == "" { +		useservice = detectService(u) +	} else { +		useservice = *service +	} + +	switch *service { +	case "iiifmanifest": +		if bookdir == "" { +			bookdir = "iiifbook" +		} +		pgUrls, err = parseMets(u) +	} + +	switch useservice { +	case "bnf":  		f := strings.Split(u[len(bnfPrefix):], "/")  		if len(f) < 2 {  			log.Fatalln("Failed to extract BNF book ID from URL") @@ -341,7 +380,9 @@ func main() {  			lastpart = f[1][0:dot]  		}  		bookid := f[0] + "/" + lastpart -		bookdir = f[0] + "-" + lastpart +		if bookdir == "" { +			bookdir = f[0] + "-" + lastpart +		}  		pgurlStart = "https://gallica.bnf.fr/iiif/ark:/" + bookid + "/f"  		pgurlEnd = "/full/full/0/native.jpg" @@ -351,13 +392,15 @@ func main() {  		// the missing ones in less good quality from an alternative URL.  		pgurlAltStart = "https://gallica.bnf.fr/ark:/" + bookid + "/f"  		pgurlAltEnd = ".highres" -	case strings.HasPrefix(u, bsbPrefix): +	case "bsb":  		f := strings.Split(u[len(bsbPrefix):], "_")  		if len(f) < 2 { -			log.Fatalln("Failed to extract BNF book ID from URL") +			log.Fatalln("Failed to extract BSB book ID from URL")  		}  		bookid := f[0] -		bookdir = bookid +		if bookdir == "" { +			bookdir = bookid +		}  		iiifurl := "https://api.digitale-sammlungen.de/iiif/presentation/v2/" + bookid + "/manifest"  		if *forcemets { @@ -369,7 +412,7 @@ func main() {  		if err != nil {  			log.Fatalf("Error parsing manifest url %s: %v\n", iiifurl, err)  		} -	case strings.HasPrefix(u, dfgPrefix): +	case "dfg":  		// dfg can have a url encoded mets url in several parts of the viewer url  		metsNames := []string{"set[mets]", "tx_dlf[id]"}  		var metsurl string @@ -396,14 +439,32 @@ func main() {  		b := path.Base(metsurl)  		f := strings.Split(b, "_") -		bookdir = f[0] +		if bookdir == "" { +			bookdir = f[0] +		}  		pgUrls, err = parseMets(metsurl)  		if err != nil {  			log.Fatalf("Error parsing mets url %s: %v\n", metsurl, err)  		} +	case "iiifmanifest": +		if bookdir == "" { +			bookdir = "iiifbook" +		} +		pgUrls, err = parseIIIFManifest(u) +		if err != nil { +			log.Fatalf("Error parsing iiif manifest url %s: %v\n", u, err) +		} +	case "mets": +		if bookdir == "" { +			bookdir = "metsbook" +		} +		pgUrls, err = parseMets(u) +		if err != nil { +			log.Fatalf("Error parsing mets url %s: %v\n", u, err) +		}  	default: -		log.Fatalln("Error: generic IIIF downloading not supported yet") +		log.Fatalln("Error: failed to autodetect service type, or invalid service type given; specify with the -service flag")  	}  	err = os.MkdirAll(bookdir, 0777)  | 
