summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-09-08 16:18:13 +0100
committerNick White <git@njw.name>2020-09-08 16:18:13 +0100
commit5a1240823f6dfa9e4ac4e6d56af9bcf9c621f2e2 (patch)
tree4c687b449e068c072379b56118390fe032796bac
parent270c7a209efc854e15b19bf9849f91ffb3a28b8d (diff)
Improve urlToPgName so it can be used by BNF too
-rw-r--r--cmd/iiifdownloader/main.go36
1 files changed, 25 insertions, 11 deletions
diff --git a/cmd/iiifdownloader/main.go b/cmd/iiifdownloader/main.go
index 11ecd0f..4b4df5b 100644
--- a/cmd/iiifdownloader/main.go
+++ b/cmd/iiifdownloader/main.go
@@ -12,6 +12,7 @@ import (
"net/url"
"os"
"path"
+ "strconv"
"strings"
)
@@ -162,7 +163,7 @@ func urlToPgName(u string) string {
safe := strings.Replace(u, "/", "_", -1)
b := path.Base(u)
- if b != "default.jpg" {
+ if b != "default.jpg" && b != "native.jpg" {
return b
}
@@ -185,7 +186,14 @@ func urlToPgName(u string) string {
pgnum = numpart[len(numpart)-4:]
}
- return pgnum + ".jpg"
+ pgnum = strings.Replace(pgnum, "f", "", 1)
+
+ pgnumint, err := strconv.Atoi(pgnum)
+ if err != nil {
+ return pgnum + ".jpg"
+ }
+
+ return fmt.Sprintf("%04d.jpg", pgnumint)
}
// dlPage downloads a page url to bookdir.
@@ -193,6 +201,12 @@ func dlPage(bookdir, u string) error {
name := urlToPgName(u)
fn := path.Join(bookdir, name)
+ _, err := os.Stat(fn)
+ if err == nil || os.IsExist(err) {
+ fmt.Printf("Skipping already present page %s\n", fn)
+ return nil
+ }
+
fmt.Printf("Downloading page %s to %s\n", u, fn)
resp, err := http.Get(u)
@@ -215,9 +229,6 @@ func dlPage(bookdir, u string) error {
return fmt.Errorf("Error writing file %s: %v\n", fn, err)
}
- resp.Body.Close()
- f.Close()
-
return nil
}
@@ -229,16 +240,19 @@ func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string
for {
pgnum++
- fmt.Printf("Downloading page %d\n", pgnum)
+ u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd)
+
+ name := urlToPgName(u)
+ fn := path.Join(bookdir, name)
- fn := path.Join(bookdir, fmt.Sprintf("%04d.jpg", pgnum))
_, err := os.Stat(fn)
if err == nil || os.IsExist(err) {
fmt.Printf("Skipping already present page %d\n", pgnum)
continue
}
- u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd)
+ fmt.Printf("Downloading page %s to %s\n", u, fn)
+
resp, err := http.Get(u)
if err != nil {
return fmt.Errorf("Error downloading page %d, %s: %v\n", pgnum, u, err)
@@ -307,9 +321,9 @@ func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string
// but enough for us for now.
func sanitiseUrl(u string) string {
var s string
- s = strings.Replace(u, "//", "/", -1)
- s = strings.Replace(s, "https:/", "https://", -1)
- s = strings.Replace(s, "http:/", "http://", -1)
+ s = strings.ReplaceAll(u, "//", "/")
+ s = strings.Replace(s, "https:/", "https://", 1)
+ s = strings.Replace(s, "http:/", "http://", 1)
return s
}