diff options
author | Nick White <git@njw.name> | 2021-02-05 17:15:51 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2021-02-05 17:15:51 +0000 |
commit | 11470933e4fd379b4aefa4e2bab33662a72791c2 (patch) | |
tree | 8607e7739989ff63032b9ce10a8bf8553ecc6eb6 /internal/pipeline/get.go | |
parent | 3e7da751b3ca917adb79674eac4ef2a3267e3984 (diff) | |
parent | a8c7481f0dc02bbda3b3a07091e9d61f6eb728b2 (diff) |
Merge branch 'master' of ssh://ssh.phx.nearlyfreespeech.net/home/public/bookpipeline
Diffstat (limited to 'internal/pipeline/get.go')
-rw-r--r-- | internal/pipeline/get.go | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go new file mode 100644 index 0000000..6c5b92c --- /dev/null +++ b/internal/pipeline/get.go @@ -0,0 +1,96 @@ +// Copyright 2019 Nick White. +// Use of this source code is governed by the GPLv3 +// license that can be found in the LICENSE file. + +package pipeline + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" +) + +func DownloadBestPages(dir string, name string, conn Pipeliner, pluspngs bool) error { + key := filepath.Join(name, "best") + fn := filepath.Join(dir, "best") + err := conn.Download(conn.WIPStorageId(), key, fn) + if err != nil { + return fmt.Errorf("Failed to download 'best' file: %v", err) + } + f, err := os.Open(fn) + if err != nil { + return fmt.Errorf("Failed to open best file: %v", err) + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + key = filepath.Join(name, s.Text()) + fn = filepath.Join(dir, s.Text()) + conn.Log("Downloading file", key) + err = conn.Download(conn.WIPStorageId(), key, fn) + if err != nil { + return fmt.Errorf("Failed to download file %s: %v", key, err) + } + } + + if !pluspngs { + return nil + } + + s = bufio.NewScanner(f) + for s.Scan() { + imgname := strings.Replace(s.Text(), ".hocr", ".png", 1) + key = filepath.Join(name, imgname) + fn = filepath.Join(dir, imgname) + conn.Log("Downloading file", key) + err = conn.Download(conn.WIPStorageId(), key, fn) + if err != nil { + return fmt.Errorf("Failed to download file %s: %v", key, err) + } + } + return nil +} + +func DownloadPdfs(dir string, name string, conn Pipeliner) error { + for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { + key := filepath.Join(name, name+suffix) + fn := filepath.Join(dir, name+suffix) + err := conn.Download(conn.WIPStorageId(), key, fn) + if err != nil { + return fmt.Errorf("Failed to download PDF %s: %v", key, err) + } + } + return nil +} + +func DownloadAnalyses(dir string, name string, conn Pipeliner) error { + for _, a := range []string{"conf", "graph.png"} { + key := filepath.Join(name, a) + fn := filepath.Join(dir, a) + err := conn.Download(conn.WIPStorageId(), key, fn) + if err != nil { + return fmt.Errorf("Failed to download analysis file %s: %v", key, err) + } + } + return nil +} + +func DownloadAll(dir string, name string, conn Pipeliner) error { + objs, err := conn.ListObjects(conn.WIPStorageId(), name) + if err != nil { + return fmt.Errorf("Failed to get list of files for book", name, err) + } + for _, i := range objs { + base := filepath.Base(i) + fn := filepath.Join(dir, base) + conn.Log("Downloading", i) + err = conn.Download(conn.WIPStorageId(), i, fn) + if err != nil { + return fmt.Errorf("Failed to download file %s: %v", i, err) + } + } + return nil +} |