From 5a1240823f6dfa9e4ac4e6d56af9bcf9c621f2e2 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 8 Sep 2020 16:18:13 +0100 Subject: Improve urlToPgName so it can be used by BNF too --- cmd/iiifdownloader/main.go | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/cmd/iiifdownloader/main.go b/cmd/iiifdownloader/main.go index 11ecd0f..4b4df5b 100644 --- a/cmd/iiifdownloader/main.go +++ b/cmd/iiifdownloader/main.go @@ -12,6 +12,7 @@ import ( "net/url" "os" "path" + "strconv" "strings" ) @@ -162,7 +163,7 @@ func urlToPgName(u string) string { safe := strings.Replace(u, "/", "_", -1) b := path.Base(u) - if b != "default.jpg" { + if b != "default.jpg" && b != "native.jpg" { return b } @@ -185,7 +186,14 @@ func urlToPgName(u string) string { pgnum = numpart[len(numpart)-4:] } - return pgnum + ".jpg" + pgnum = strings.Replace(pgnum, "f", "", 1) + + pgnumint, err := strconv.Atoi(pgnum) + if err != nil { + return pgnum + ".jpg" + } + + return fmt.Sprintf("%04d.jpg", pgnumint) } // dlPage downloads a page url to bookdir. @@ -193,6 +201,12 @@ func dlPage(bookdir, u string) error { name := urlToPgName(u) fn := path.Join(bookdir, name) + _, err := os.Stat(fn) + if err == nil || os.IsExist(err) { + fmt.Printf("Skipping already present page %s\n", fn) + return nil + } + fmt.Printf("Downloading page %s to %s\n", u, fn) resp, err := http.Get(u) @@ -215,9 +229,6 @@ func dlPage(bookdir, u string) error { return fmt.Errorf("Error writing file %s: %v\n", fn, err) } - resp.Body.Close() - f.Close() - return nil } @@ -229,16 +240,19 @@ func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string for { pgnum++ - fmt.Printf("Downloading page %d\n", pgnum) + u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd) + + name := urlToPgName(u) + fn := path.Join(bookdir, name) - fn := path.Join(bookdir, fmt.Sprintf("%04d.jpg", pgnum)) _, err := os.Stat(fn) if err == nil || os.IsExist(err) { fmt.Printf("Skipping already present page %d\n", pgnum) continue } - u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd) + fmt.Printf("Downloading page %s to %s\n", u, fn) + resp, err := http.Get(u) if err != nil { return fmt.Errorf("Error downloading page %d, %s: %v\n", pgnum, u, err) @@ -307,9 +321,9 @@ func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string // but enough for us for now. func sanitiseUrl(u string) string { var s string - s = strings.Replace(u, "//", "/", -1) - s = strings.Replace(s, "https:/", "https://", -1) - s = strings.Replace(s, "http:/", "http://", -1) + s = strings.ReplaceAll(u, "//", "/") + s = strings.Replace(s, "https:/", "https://", 1) + s = strings.Replace(s, "http:/", "http://", 1) return s } -- cgit v1.2.1-24-ge1ad