summaryrefslogtreecommitdiff
path: root/cmd/iiifdownloader/main.go
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-09-07 17:43:05 +0100
committerNick White <git@njw.name>2020-09-07 17:43:05 +0100
commit30486c55b5b226f4a099ae1511dfea38dd9acfa9 (patch)
tree31258dba33c2a808f2804853cb6f0f31aab02620 /cmd/iiifdownloader/main.go
parent8b5644941e89698b8f78f17e64a5d563c075af0e (diff)
[iiifdownloader] Split out NoPgNums downloading to its own function
Diffstat (limited to 'cmd/iiifdownloader/main.go')
-rw-r--r--cmd/iiifdownloader/main.go160
1 files changed, 84 insertions, 76 deletions
diff --git a/cmd/iiifdownloader/main.go b/cmd/iiifdownloader/main.go
index 3572dae..9dfc067 100644
--- a/cmd/iiifdownloader/main.go
+++ b/cmd/iiifdownloader/main.go
@@ -59,6 +59,88 @@ func parseMets(url string) ([]string, error) {
return urls, nil
}
+// dlNoPgNums downloads all pages, starting from zero, until either
+// a 404 is returned, or identical files are returned for two subsequent
+// pages (the latter being the behaviour of BNF's server).
+func dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd string) error {
+ pgnum := 0
+ for {
+ pgnum++
+
+ fmt.Printf("Downloading page %d\n", pgnum)
+
+ fn := path.Join(bookdir, fmt.Sprintf("%04d.jpg", pgnum))
+ _, err := os.Stat(fn)
+ if err == nil || os.IsExist(err) {
+ fmt.Printf("Skipping already present page %d\n", pgnum)
+ continue
+ }
+
+ u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd)
+ resp, err := http.Get(u)
+ if err != nil {
+ return fmt.Errorf("Error downloading page %d, %s: %v\n", pgnum, u, err)
+ }
+ defer resp.Body.Close()
+ switch {
+ case resp.StatusCode == http.StatusNotFound:
+ fmt.Printf("Got 404, assuming end of pages, for page %d, %s\n", pgnum, u)
+ return nil
+ case resp.StatusCode != http.StatusOK:
+ fmt.Printf("Error downloading page %d, %s: HTTP Code %s\n", pgnum, u, resp.Status)
+
+ if pgurlAltStart == "" && pgurlAltEnd == "" {
+ return fmt.Errorf("No alternative URL to try, book failed (or ended, hopefully)")
+ }
+
+ fmt.Printf("Trying to redownload page %d at lower quality\n", pgnum)
+ u = fmt.Sprintf("%s%d%s", pgurlAltStart, pgnum, pgurlAltEnd)
+ resp, err = http.Get(u)
+ if err != nil {
+ return fmt.Errorf("Error downloading page %d, %s: %v\n", pgnum, u, err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return fmt.Errorf("Error downloading page %d, %s: HTTP Code %s\n", pgnum, u, resp.Status)
+ }
+ }
+
+ f, err := os.Create(fn)
+ defer f.Close()
+ if err != nil {
+ return fmt.Errorf("Error creating file %s: %v\n", fn, err)
+ }
+ _, err = io.Copy(f, resp.Body)
+ if err != nil {
+ return fmt.Errorf("Error writing file %s: %v\n", fn, err)
+ }
+
+ // Close once finished with, as defer won't trigger until the end of the function
+ resp.Body.Close()
+ f.Close()
+
+ // Check that the last two downloaded files aren't identical, as this
+ // can happen when there are no more pages to download.
+ if pgnum == 1 {
+ continue
+ }
+ fn2 := path.Join(bookdir, fmt.Sprintf("%04d.jpg", pgnum-1))
+ identical, err := filesAreIdentical(fn, fn2)
+ if err != nil {
+ return fmt.Errorf("Error checking for files being identical: %v\n", err)
+ }
+ if identical {
+ fmt.Println("Last 2 pages were identical, looks like it's the end of the book")
+ err = os.Remove(fn)
+ if err != nil {
+ return fmt.Errorf("Error removing dupilicate page %d: %v", fn, err)
+ }
+ return nil
+ }
+ }
+}
+
func main() {
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
@@ -110,6 +192,7 @@ func main() {
log.Fatalln("Failed to extract BNF book ID from URL")
}
bookid := f[0]
+ bookdir = bookid
metsurl := "https://daten.digitale-sammlungen.de/~db/mets/" + bookid + "_mets.xml"
pgUrls, err = parseMets(metsurl)
@@ -128,81 +211,6 @@ func main() {
if len(pgUrls) > 0 {
fmt.Printf("I'll do something proper with these urls: %v\n", pgUrls)
} else if noPgNums {
- pgnum := 0
- for {
- pgnum++
-
- fmt.Printf("Downloading page %d\n", pgnum)
-
- fn := path.Join(bookdir, fmt.Sprintf("%04d.jpg", pgnum))
- _, err = os.Stat(fn)
- if err == nil || os.IsExist(err) {
- fmt.Printf("Skipping already present page %d\n", pgnum)
- continue
- }
-
- u := fmt.Sprintf("%s%d%s", pgurlStart, pgnum, pgurlEnd)
- resp, err := http.Get(u)
- if err != nil {
- log.Fatalf("Error downloading page %d, %s: %v\n", pgnum, u, err)
- }
- defer resp.Body.Close()
- switch {
- case resp.StatusCode == http.StatusNotFound:
- fmt.Printf("Got 404, assuming end of pages, for page %d, %s\n", pgnum, u)
- return
- case resp.StatusCode != http.StatusOK:
- fmt.Printf("Error downloading page %d, %s: HTTP Code %s\n", pgnum, u, resp.Status)
-
- if pgurlAltStart == "" && pgurlAltEnd == "" {
- log.Fatalln("No alternative URL to try, book failed (or ended, hopefully)")
- }
-
- fmt.Printf("Trying to redownload page %d at lower quality\n", pgnum)
- u = fmt.Sprintf("%s%d%s", pgurlAltStart, pgnum, pgurlAltEnd)
- resp, err = http.Get(u)
- if err != nil {
- log.Fatalf("Error downloading page %d, %s: %v\n", pgnum, u, err)
- }
- defer resp.Body.Close()
-
- if resp.StatusCode != http.StatusOK {
- log.Fatalf("Error downloading page %d, %s: HTTP Code %s\n", pgnum, u, resp.Status)
- }
- }
-
- f, err := os.Create(fn)
- defer f.Close()
- if err != nil {
- log.Fatalf("Error creating file %s: %v\n", fn, err)
- }
- _, err = io.Copy(f, resp.Body)
- if err != nil {
- log.Fatalf("Error writing file %s: %v\n", fn, err)
- }
-
- // Close once finished with, as defer won't trigger until the end of the function
- resp.Body.Close()
- f.Close()
-
- // Check that the last two downloaded files aren't identical, as this
- // can happen when there are no more pages to download.
- if pgnum == 1 {
- continue
- }
- fn2 := path.Join(bookdir, fmt.Sprintf("%04d.jpg", pgnum-1))
- identical, err := filesAreIdentical(fn, fn2)
- if err != nil {
- log.Fatalf("Error checking for files being identical: %v\n", err)
- }
- if identical {
- fmt.Println("Last 2 pages were identical, looks like it's the end of the book")
- err = os.Remove(fn)
- if err != nil {
- log.Fatalf("Error removing dupilicate page %d: %v", fn, err)
- }
- return
- }
- }
+ dlNoPgNums(bookdir, pgurlStart, pgurlEnd, pgurlAltStart, pgurlAltEnd)
}
}