diff options
author | Nick White <git@njw.name> | 2020-09-08 16:18:13 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2020-09-08 16:18:13 +0100 |
commit | 5a1240823f6dfa9e4ac4e6d56af9bcf9c621f2e2 (patch) | |
tree | 4c687b449e068c072379b56118390fe032796bac | |
parent | 270c7a209efc854e15b19bf9849f91ffb3a28b8d (diff) |
Improve urlToPgName so it can be used by BNF too
-rw-r--r-- | cmd/iiifdownloader/main.go | 36 |
1 files changed, 25 insertions, 11 deletions
diff --git a/cmd/iiifdownloader/main.go b/cmd/iiifdownloader/main.go index 11ecd0f..4b4df5b 100644 --- a/cmd/iiifdownloader/main.go +++ b/cmd/iiifdownloader/main.go @@ -12,6 +12,7 @@ import ( "net/url" "os" "path" + "strconv" "strings" ) @@ -162,7 +163,7 @@ func urlToPgName(u string) string { safe := strings.Replace(u, "/", "_", -1) b := path.Base(u) - if b != "default.jpg" { + if b != "default.jpg" && b != "native.jpg" { return b } @@ -185,7 +186,14 @@ func urlToPgName(u string) string { pgnum = numpart[len(numpart)-4:] } - return pgnum + ".jpg" + pgnum = strings.Replace(pgnum, "f", "", 1) + + pgnumint, err := strconv.Atoi(pgnum) + if err != nil { + return pgnum + ".jpg" + } + + return fmt.Sprintf("%04d.jpg", pgnumint) } // dlPage downloads a page url to bookdir. @@ -193,6 +201,12 @@ func dlPage(bookdir, u string) error { name := urlToPgName(u) fn := path.Join(bookdir, name) + _, err := os.Stat(fn) + if err == nil || os.IsExist(err) { + fmt.Printf("Skipping already present page %s\n", fn) + return nil + } + fmt.Printf("Downloading page %s to %s\n", u, fn) resp, err := http.Get(u) @@ -215,9 +229,6 @@ func dlPage(bookdir, u string) error { return fmt.Errorf("Error writing file %s: %v\n", fn, err) } - resp.Body.Close() - f.Close() - return nil } @@ -229,16 +240,19 @@ func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string for { pgnum++ - fmt.Printf("Downloading page %d\n", pgnum) + u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd) + + name := urlToPgName(u) + fn := path.Join(bookdir, name) - fn := path.Join(bookdir, fmt.Sprintf("%04d.jpg", pgnum)) _, err := os.Stat(fn) if err == nil || os.IsExist(err) { fmt.Printf("Skipping already present page %d\n", pgnum) continue } - u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd) + fmt.Printf("Downloading page %s to %s\n", u, fn) + resp, err := http.Get(u) if err != nil { return fmt.Errorf("Error downloading page %d, %s: %v\n", pgnum, u, err) @@ -307,9 +321,9 @@ func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string // but enough for us for now. func sanitiseUrl(u string) string { var s string - s = strings.Replace(u, "//", "/", -1) - s = strings.Replace(s, "https:/", "https://", -1) - s = strings.Replace(s, "http:/", "http://", -1) + s = strings.ReplaceAll(u, "//", "/") + s = strings.Replace(s, "https:/", "https://", 1) + s = strings.Replace(s, "http:/", "http://", 1) return s } |