From e292f759acc8b46523c7ac0cdf44b301b6d9eb73 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 8 Sep 2020 15:48:13 +0100 Subject: Sanitise URLs so that // in url doesn't cause issues (bsb site can spew these) --- cmd/iiifdownloader/main.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cmd/iiifdownloader/main.go b/cmd/iiifdownloader/main.go index 57247cc..55b72b2 100644 --- a/cmd/iiifdownloader/main.go +++ b/cmd/iiifdownloader/main.go @@ -21,8 +21,9 @@ const usage = `Usage: iiifdownloader url Downloads all pages from a IIIF server. Currently supports the following IIIF using services: -- BNF's Gallica (any book or page URL should work) -- BSB / MDZ / DFG +- BNF's Gallica +- BSB / MDZ +- DFG Viewer ` const bnfPrefix = `https://gallica.bnf.fr/ark:/` @@ -294,6 +295,14 @@ func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string } } +func sanitiseUrl(u string) string { + var s string + s = strings.Replace(u, "//", "/", -1) + s = strings.Replace(s, "https:/", "https://", -1) + s = strings.Replace(s, "http:/", "http://", -1) + return s +} + func main() { flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -306,7 +315,7 @@ func main() { return } - u := flag.Arg(0) + u := sanitiseUrl(flag.Arg(0)) var bookdir string var pgurlStart, pgurlEnd string -- cgit v1.2.1-24-ge1ad