summaryrefslogtreecommitdiff
path: root/bookpipeline
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-09-02 10:15:08 +0100
committerNick White <git@njw.name>2019-09-02 10:15:08 +0100
commite23fde072f7bfcef95ae4d042c266c15737b8da7 (patch)
tree335a4390abf1c86cf8da8c2bc27cefb27cda1ac2 /bookpipeline
parentbd557cbeb752c6296f3a74cc2c7aea5c13bb55fa (diff)
Add initial getpipelinebook cmd (untested)
Diffstat (limited to 'bookpipeline')
-rw-r--r--bookpipeline/cmd/getpipelinebook/main.go121
1 files changed, 121 insertions, 0 deletions
diff --git a/bookpipeline/cmd/getpipelinebook/main.go b/bookpipeline/cmd/getpipelinebook/main.go
new file mode 100644
index 0000000..9d0e35e
--- /dev/null
+++ b/bookpipeline/cmd/getpipelinebook/main.go
@@ -0,0 +1,121 @@
+package main
+
+import (
+ "bufio"
+ "flag"
+ "fmt"
+ "log"
+ "os"
+ "path/filepath"
+
+ "rescribe.xyz/go.git/bookpipeline"
+)
+
+const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n"
+
+// null writer to enable non-verbose logging to be discarded
+type NullWriter bool
+func (w NullWriter) Write(p []byte) (n int, err error) {
+ return len(p), nil
+}
+
+type Pipeliner interface {
+ Init() error
+ ListObjects(bucket string, prefix string) ([]string, error)
+ Download(bucket string, key string, fn string) error
+ Upload(bucket string, key string, path string) error
+ CheckQueue(url string) (bookpipeline.Qmsg, error)
+ AddToQueue(url string, msg string) error
+ DelFromQueue(url string, handle string) error
+ WIPStorageId() string
+}
+
+func main() {
+ all := flag.Bool("a", false, "Get all files for book, not just hOCR and analysis files")
+ verbose := flag.Bool("v", false, "Verbose")
+ flag.Usage = func() {
+ fmt.Fprintf(flag.CommandLine.Output(), usage)
+ flag.PrintDefaults()
+ }
+ flag.Parse()
+
+ if flag.NArg() < 1 {
+ flag.Usage()
+ return
+ }
+
+ var verboselog *log.Logger
+ if *verbose {
+ verboselog = log.New(os.Stdout, "", log.LstdFlags)
+ } else {
+ var n NullWriter
+ verboselog = log.New(n, "", log.LstdFlags)
+ }
+
+ var conn Pipeliner
+ conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
+
+ verboselog.Println("Setting up AWS session")
+ err := conn.Init()
+ if err != nil {
+ log.Fatalln("Error setting up cloud connection:", err)
+ }
+ verboselog.Println("Finished setting up AWS session")
+
+ bookname := flag.Arg(0)
+
+ err = os.MkdirAll(bookname, 0755)
+ if err != nil {
+ log.Fatalln("Failed to create directory", bookname, err)
+ }
+
+ if *all {
+ verboselog.Println("Downloading all files for", bookname)
+ objs, err := conn.ListObjects(conn.WIPStorageId(), bookname)
+ if err != nil {
+ log.Fatalln("Failed to get list of files for book", bookname, err)
+ }
+ for _, i := range objs {
+ verboselog.Println("Downloading", i)
+ err = conn.Download(conn.WIPStorageId(), i, i)
+ if err != nil {
+ log.Fatalln("Failed to download file", i, err)
+ }
+ }
+ return
+ }
+
+ verboselog.Println("Downloading best file")
+ fn := filepath.Join(bookname, "best")
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download 'best' file", err)
+ }
+ f, err := os.Open(fn)
+ if err != nil {
+ log.Fatalln("Failed to open best file", err)
+ }
+ defer f.Close()
+
+ verboselog.Println("Downloading HOCR files")
+ s := bufio.NewScanner(f)
+ for s.Scan() {
+ fn = filepath.Join(bookname, s.Text())
+ verboselog.Println("Downloading file", fn)
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download file", fn, err)
+ }
+ }
+
+ analyses := []string{"conf", "graph.png"}
+ verboselog.Println("Downloading analysis files")
+ for _, a := range analyses {
+ fn = filepath.Join(bookname, a)
+ verboselog.Println("Downloading file", fn)
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download file", fn, err)
+ }
+ }
+}