From a1de8862a091f9584220db40671a0d43346c4519 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 9 Nov 2020 18:29:56 +0000 Subject: [rescribe] Local only combo tool basically now working. Testing is still minimal. --- internal/pipeline/get.go | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 internal/pipeline/get.go (limited to 'internal/pipeline/get.go') diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go new file mode 100644 index 0000000..8492d99 --- /dev/null +++ b/internal/pipeline/get.go @@ -0,0 +1,58 @@ +// Copyright 2019 Nick White. +// Use of this source code is governed by the GPLv3 +// license that can be found in the LICENSE file. + +package pipeline + +import ( + "bufio" + "fmt" + "os" + "path/filepath" +) + +func DownloadBestPages(name string, conn Pipeliner) error { + fn := filepath.Join(name, "best") + err := conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + return fmt.Errorf("Failed to download 'best' file: %v", err) + } + f, err := os.Open(fn) + if err != nil { + return fmt.Errorf("Failed to open best file: %v", err) + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + fn = filepath.Join(name, s.Text()) + err = conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + return fmt.Errorf("Failed to download file %s: %v", fn, err) + } + } + + return nil +} + +func DownloadPdfs(name string, conn Pipeliner) error { + for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { + fn := filepath.Join(name, name+suffix) + err := conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + return fmt.Errorf("Failed to download PDF %s: %v", fn, err) + } + } + return nil +} + +func DownloadAnalyses(name string, conn Pipeliner) error { + for _, a := range []string{"conf", "graph.png"} { + fn := filepath.Join(name, a) + err := conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + return fmt.Errorf("Failed to download analysis file %s: %v", fn, err) + } + } + return nil +} -- cgit v1.2.1-24-ge1ad From 198f8215f8dd0460608abcd03fa49451462c9d11 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 10 Nov 2020 10:41:15 +0000 Subject: [getpipelinebook] Rewrite to use internal package functions --- internal/pipeline/get.go | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'internal/pipeline/get.go') diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go index 8492d99..6949062 100644 --- a/internal/pipeline/get.go +++ b/internal/pipeline/get.go @@ -9,9 +9,10 @@ import ( "fmt" "os" "path/filepath" + "strings" ) -func DownloadBestPages(name string, conn Pipeliner) error { +func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error { fn := filepath.Join(name, "best") err := conn.Download(conn.WIPStorageId(), fn, fn) if err != nil { @@ -26,12 +27,27 @@ func DownloadBestPages(name string, conn Pipeliner) error { s := bufio.NewScanner(f) for s.Scan() { fn = filepath.Join(name, s.Text()) + conn.Log("Downloading file", fn) err = conn.Download(conn.WIPStorageId(), fn, fn) if err != nil { return fmt.Errorf("Failed to download file %s: %v", fn, err) } } + if !pluspngs { + return nil + } + + s = bufio.NewScanner(f) + for s.Scan() { + txtfn := filepath.Join(name, s.Text()) + fn = strings.Replace(txtfn, ".hocr", ".png", 1) + conn.Log("Downloading file", fn) + err = conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + return fmt.Errorf("Failed to download file", fn, err) + } + } return nil } @@ -56,3 +72,18 @@ func DownloadAnalyses(name string, conn Pipeliner) error { } return nil } + +func DownloadAll(name string, conn Pipeliner) error { + objs, err := conn.ListObjects(conn.WIPStorageId(), name) + if err != nil { + return fmt.Errorf("Failed to get list of files for book", name, err) + } + for _, i := range objs { + conn.Log("Downloading", i) + err = conn.Download(conn.WIPStorageId(), i, i) + if err != nil { + return fmt.Errorf("Failed to download file", i, err) + } + } + return nil +} -- cgit v1.2.1-24-ge1ad From 068ad0b666705a49ab22d7b48cd6a7d67b37f234 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 7 Dec 2020 16:53:58 +0000 Subject: [rescribe] Allow saving of results to somewhere other than a directory named after the book being processed --- internal/pipeline/get.go | 53 +++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'internal/pipeline/get.go') diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go index 6949062..6c5b92c 100644 --- a/internal/pipeline/get.go +++ b/internal/pipeline/get.go @@ -12,9 +12,10 @@ import ( "strings" ) -func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error { - fn := filepath.Join(name, "best") - err := conn.Download(conn.WIPStorageId(), fn, fn) +func DownloadBestPages(dir string, name string, conn Pipeliner, pluspngs bool) error { + key := filepath.Join(name, "best") + fn := filepath.Join(dir, "best") + err := conn.Download(conn.WIPStorageId(), key, fn) if err != nil { return fmt.Errorf("Failed to download 'best' file: %v", err) } @@ -26,11 +27,12 @@ func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error { s := bufio.NewScanner(f) for s.Scan() { - fn = filepath.Join(name, s.Text()) - conn.Log("Downloading file", fn) - err = conn.Download(conn.WIPStorageId(), fn, fn) + key = filepath.Join(name, s.Text()) + fn = filepath.Join(dir, s.Text()) + conn.Log("Downloading file", key) + err = conn.Download(conn.WIPStorageId(), key, fn) if err != nil { - return fmt.Errorf("Failed to download file %s: %v", fn, err) + return fmt.Errorf("Failed to download file %s: %v", key, err) } } @@ -40,49 +42,54 @@ func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error { s = bufio.NewScanner(f) for s.Scan() { - txtfn := filepath.Join(name, s.Text()) - fn = strings.Replace(txtfn, ".hocr", ".png", 1) - conn.Log("Downloading file", fn) - err = conn.Download(conn.WIPStorageId(), fn, fn) + imgname := strings.Replace(s.Text(), ".hocr", ".png", 1) + key = filepath.Join(name, imgname) + fn = filepath.Join(dir, imgname) + conn.Log("Downloading file", key) + err = conn.Download(conn.WIPStorageId(), key, fn) if err != nil { - return fmt.Errorf("Failed to download file", fn, err) + return fmt.Errorf("Failed to download file %s: %v", key, err) } } return nil } -func DownloadPdfs(name string, conn Pipeliner) error { +func DownloadPdfs(dir string, name string, conn Pipeliner) error { for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { - fn := filepath.Join(name, name+suffix) - err := conn.Download(conn.WIPStorageId(), fn, fn) + key := filepath.Join(name, name+suffix) + fn := filepath.Join(dir, name+suffix) + err := conn.Download(conn.WIPStorageId(), key, fn) if err != nil { - return fmt.Errorf("Failed to download PDF %s: %v", fn, err) + return fmt.Errorf("Failed to download PDF %s: %v", key, err) } } return nil } -func DownloadAnalyses(name string, conn Pipeliner) error { +func DownloadAnalyses(dir string, name string, conn Pipeliner) error { for _, a := range []string{"conf", "graph.png"} { - fn := filepath.Join(name, a) - err := conn.Download(conn.WIPStorageId(), fn, fn) + key := filepath.Join(name, a) + fn := filepath.Join(dir, a) + err := conn.Download(conn.WIPStorageId(), key, fn) if err != nil { - return fmt.Errorf("Failed to download analysis file %s: %v", fn, err) + return fmt.Errorf("Failed to download analysis file %s: %v", key, err) } } return nil } -func DownloadAll(name string, conn Pipeliner) error { +func DownloadAll(dir string, name string, conn Pipeliner) error { objs, err := conn.ListObjects(conn.WIPStorageId(), name) if err != nil { return fmt.Errorf("Failed to get list of files for book", name, err) } for _, i := range objs { + base := filepath.Base(i) + fn := filepath.Join(dir, base) conn.Log("Downloading", i) - err = conn.Download(conn.WIPStorageId(), i, i) + err = conn.Download(conn.WIPStorageId(), i, fn) if err != nil { - return fmt.Errorf("Failed to download file", i, err) + return fmt.Errorf("Failed to download file %s: %v", i, err) } } return nil -- cgit v1.2.1-24-ge1ad