summaryrefslogtreecommitdiff
path: root/cmd/getpipelinebook
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-10-08 12:52:33 +0100
committerNick White <git@njw.name>2019-10-08 12:52:33 +0100
commit7482157a03ed3e9d7f45e54a126b391001f34948 (patch)
tree52f87b9ca159fe4c04a0349de95ea9de82692b3c /cmd/getpipelinebook
parentd43c11bf653bfe3c1ad1ed277f1ec08bf155cf98 (diff)
Separate out bookpipeline from catch-all go.git repo, and rename to rescribe.xyz/bookpipeline
The dependencies from the go.git repo will follow in due course.
Diffstat (limited to 'cmd/getpipelinebook')
-rw-r--r--cmd/getpipelinebook/main.go122
1 files changed, 122 insertions, 0 deletions
diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go
new file mode 100644
index 0000000..9e900bf
--- /dev/null
+++ b/cmd/getpipelinebook/main.go
@@ -0,0 +1,122 @@
+package main
+
+import (
+ "bufio"
+ "flag"
+ "fmt"
+ "log"
+ "os"
+ "path/filepath"
+
+ "rescribe.xyz/bookpipeline"
+)
+
+const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n"
+
+// null writer to enable non-verbose logging to be discarded
+type NullWriter bool
+
+func (w NullWriter) Write(p []byte) (n int, err error) {
+ return len(p), nil
+}
+
+type Pipeliner interface {
+ Init() error
+ ListObjects(bucket string, prefix string) ([]string, error)
+ Download(bucket string, key string, fn string) error
+ Upload(bucket string, key string, path string) error
+ CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error)
+ AddToQueue(url string, msg string) error
+ DelFromQueue(url string, handle string) error
+ WIPStorageId() string
+}
+
+func main() {
+ all := flag.Bool("a", false, "Get all files for book, not just hOCR and analysis files")
+ verbose := flag.Bool("v", false, "Verbose")
+ flag.Usage = func() {
+ fmt.Fprintf(flag.CommandLine.Output(), usage)
+ flag.PrintDefaults()
+ }
+ flag.Parse()
+
+ if flag.NArg() < 1 {
+ flag.Usage()
+ return
+ }
+
+ var verboselog *log.Logger
+ if *verbose {
+ verboselog = log.New(os.Stdout, "", log.LstdFlags)
+ } else {
+ var n NullWriter
+ verboselog = log.New(n, "", log.LstdFlags)
+ }
+
+ var conn Pipeliner
+ conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
+
+ verboselog.Println("Setting up AWS session")
+ err := conn.Init()
+ if err != nil {
+ log.Fatalln("Error setting up cloud connection:", err)
+ }
+ verboselog.Println("Finished setting up AWS session")
+
+ bookname := flag.Arg(0)
+
+ err = os.MkdirAll(bookname, 0755)
+ if err != nil {
+ log.Fatalln("Failed to create directory", bookname, err)
+ }
+
+ if *all {
+ verboselog.Println("Downloading all files for", bookname)
+ objs, err := conn.ListObjects(conn.WIPStorageId(), bookname)
+ if err != nil {
+ log.Fatalln("Failed to get list of files for book", bookname, err)
+ }
+ for _, i := range objs {
+ verboselog.Println("Downloading", i)
+ err = conn.Download(conn.WIPStorageId(), i, i)
+ if err != nil {
+ log.Fatalln("Failed to download file", i, err)
+ }
+ }
+ return
+ }
+
+ verboselog.Println("Downloading best file")
+ fn := filepath.Join(bookname, "best")
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download 'best' file", err)
+ }
+ f, err := os.Open(fn)
+ if err != nil {
+ log.Fatalln("Failed to open best file", err)
+ }
+ defer f.Close()
+
+ verboselog.Println("Downloading HOCR files")
+ s := bufio.NewScanner(f)
+ for s.Scan() {
+ fn = filepath.Join(bookname, s.Text())
+ verboselog.Println("Downloading file", fn)
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download file", fn, err)
+ }
+ }
+
+ analyses := []string{"conf", "graph.png"}
+ verboselog.Println("Downloading analysis files")
+ for _, a := range analyses {
+ fn = filepath.Join(bookname, a)
+ verboselog.Println("Downloading file", fn)
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download file", fn, err)
+ }
+ }
+}