From 2c040f73ce7bbba480c441a0433fc8b4d6449254 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 9 Nov 2020 18:57:21 +0000 Subject: Add a couple of things that should not be forgotten --- cmd/getpipelinebook/main.go | 2 ++ 1 file changed, 2 insertions(+) (limited to 'cmd/getpipelinebook') diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index 03e709b..ef13db5 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -17,6 +17,8 @@ import ( "rescribe.xyz/bookpipeline" ) +// TODO: use internal/pipeline/get.go functions + const usage = `Usage: getpipelinebook [-c conn] [-a] [-graph] [-pdf] [-png] [-v] bookname Downloads the pipeline results for a book. -- cgit v1.2.1-24-ge1ad From 198f8215f8dd0460608abcd03fa49451462c9d11 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 10 Nov 2020 10:41:15 +0000 Subject: [getpipelinebook] Rewrite to use internal package functions --- cmd/getpipelinebook/main.go | 100 ++++++++------------------------------------ 1 file changed, 18 insertions(+), 82 deletions(-) (limited to 'cmd/getpipelinebook') diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index ef13db5..5116414 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -6,18 +6,16 @@ package main import ( - "bufio" "flag" "fmt" "log" "os" "path/filepath" - "strings" "rescribe.xyz/bookpipeline" -) -// TODO: use internal/pipeline/get.go functions + "rescribe.xyz/bookpipeline/internal/pipeline" +) const usage = `Usage: getpipelinebook [-c conn] [-a] [-graph] [-pdf] [-png] [-v] bookname @@ -35,28 +33,6 @@ func (w NullWriter) Write(p []byte) (n int, err error) { return len(p), nil } -type Pipeliner interface { - MinimalInit() error - ListObjects(bucket string, prefix string) ([]string, error) - Download(bucket string, key string, fn string) error - Upload(bucket string, key string, path string) error - CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error) - AddToQueue(url string, msg string) error - DelFromQueue(url string, handle string) error - WIPStorageId() string -} - -func getpdfs(conn Pipeliner, l *log.Logger, bookname string) { - for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { - fn := filepath.Join(bookname, bookname+suffix) - l.Println("Downloading PDF", fn) - err := conn.Download(conn.WIPStorageId(), fn, fn) - if err != nil { - log.Printf("Failed to download %s: %s\n", fn, err) - } - } -} - func main() { all := flag.Bool("a", false, "Get all files for book") conntype := flag.String("c", "aws", "connection type ('aws' or 'local')") @@ -85,7 +61,7 @@ func main() { verboselog = log.New(n, "", log.LstdFlags) } - var conn Pipeliner + var conn pipeline.MinPipeliner switch *conntype { case "aws": conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog} @@ -111,18 +87,10 @@ func main() { if *all { verboselog.Println("Downloading all files for", bookname) - objs, err := conn.ListObjects(conn.WIPStorageId(), bookname) + err = pipeline.DownloadAll(bookname, conn) if err != nil { - log.Fatalln("Failed to get list of files for book", bookname, err) - } - for _, i := range objs { - verboselog.Println("Downloading", i) - err = conn.Download(conn.WIPStorageId(), i, i) - if err != nil { - log.Fatalln("Failed to download file", i, err) - } + log.Fatalln(err) } - return } if *binarisedpdf { @@ -153,61 +121,29 @@ func main() { } if *pdf { - getpdfs(conn, verboselog, bookname) + verboselog.Println("Downloading PDFs") + pipeline.DownloadPdfs(bookname, conn) } if *binarisedpdf || *colourpdf || *graph || *pdf { return } - verboselog.Println("Downloading best file") - fn := filepath.Join(bookname, "best") - err = conn.Download(conn.WIPStorageId(), fn, fn) + verboselog.Println("Downloading best pages") + err = pipeline.DownloadBestPages(bookname, conn, *png) if err != nil { - log.Fatalln("Failed to download 'best' file", err) - } - f, err := os.Open(fn) - if err != nil { - log.Fatalln("Failed to open best file", err) - } - defer f.Close() - - if *png { - verboselog.Println("Downloading png files") - s := bufio.NewScanner(f) - for s.Scan() { - txtfn := filepath.Join(bookname, s.Text()) - fn = strings.Replace(txtfn, ".hocr", ".png", 1) - verboselog.Println("Downloading file", fn) - err = conn.Download(conn.WIPStorageId(), fn, fn) - if err != nil { - log.Fatalln("Failed to download file", fn, err) - } - } - return + log.Fatalln(err) } - verboselog.Println("Downloading HOCR files") - s := bufio.NewScanner(f) - for s.Scan() { - fn = filepath.Join(bookname, s.Text()) - verboselog.Println("Downloading file", fn) - err = conn.Download(conn.WIPStorageId(), fn, fn) - if err != nil { - log.Fatalln("Failed to download file", fn, err) - } + verboselog.Println("Downloading PDFs") + pipeline.DownloadPdfs(bookname, conn) + if err != nil { + log.Fatalln(err) } - verboselog.Println("Downloading PDF files") - getpdfs(conn, verboselog, bookname) - - verboselog.Println("Downloading analysis files") - for _, a := range []string{"conf", "graph.png"} { - fn = filepath.Join(bookname, a) - verboselog.Println("Downloading file", fn) - err = conn.Download(conn.WIPStorageId(), fn, fn) - if err != nil { - log.Fatalln("Failed to download file", fn, err) - } + verboselog.Println("Downloading analyses") + err = pipeline.DownloadAnalyses(bookname, conn) + if err != nil { + log.Fatalln(err) } } -- cgit v1.2.1-24-ge1ad From 068ad0b666705a49ab22d7b48cd6a7d67b37f234 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 7 Dec 2020 16:53:58 +0000 Subject: [rescribe] Allow saving of results to somewhere other than a directory named after the book being processed --- cmd/getpipelinebook/main.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'cmd/getpipelinebook') diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index 5116414..ccedd72 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -87,7 +87,7 @@ func main() { if *all { verboselog.Println("Downloading all files for", bookname) - err = pipeline.DownloadAll(bookname, conn) + err = pipeline.DownloadAll(bookname, bookname, conn) if err != nil { log.Fatalln(err) } @@ -122,7 +122,7 @@ func main() { if *pdf { verboselog.Println("Downloading PDFs") - pipeline.DownloadPdfs(bookname, conn) + pipeline.DownloadPdfs(bookname, bookname, conn) } if *binarisedpdf || *colourpdf || *graph || *pdf { @@ -130,19 +130,19 @@ func main() { } verboselog.Println("Downloading best pages") - err = pipeline.DownloadBestPages(bookname, conn, *png) + err = pipeline.DownloadBestPages(bookname, bookname, conn, *png) if err != nil { log.Fatalln(err) } verboselog.Println("Downloading PDFs") - pipeline.DownloadPdfs(bookname, conn) + pipeline.DownloadPdfs(bookname, bookname, conn) if err != nil { log.Fatalln(err) } verboselog.Println("Downloading analyses") - err = pipeline.DownloadAnalyses(bookname, conn) + err = pipeline.DownloadAnalyses(bookname, bookname, conn) if err != nil { log.Fatalln(err) } -- cgit v1.2.1-24-ge1ad