summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-04-14 12:09:20 +0100
committerNick White <git@njw.name>2020-04-14 12:09:20 +0100
commit1f7329ea444738f540d89567b1d7f2d737dd9c31 (patch)
tree4599bdd35cc769714c15a1217ddcf6d046655848
parent2f5f587dca00ce508b4877a0f2ed049b6937112f (diff)
Add getbests tool that was previously in the utils repo
-rw-r--r--cmd/getbests/main.go72
1 files changed, 72 insertions, 0 deletions
diff --git a/cmd/getbests/main.go b/cmd/getbests/main.go
new file mode 100644
index 0000000..9eca0d8
--- /dev/null
+++ b/cmd/getbests/main.go
@@ -0,0 +1,72 @@
+// Copyright 2020 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+// getbests downloads every 'best' file from a set of OCRed books
+// stored on cloud infrastructure
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "strings"
+
+ "rescribe.xyz/bookpipeline"
+)
+
+const usage = `Usage: getbests
+
+Downloads every 'best' file from a set of OCRed books. This is
+useful for statistics.
+`
+
+// null writer to enable non-verbose logging to be discarded
+type NullWriter bool
+
+func (w NullWriter) Write(p []byte) (n int, err error) {
+ return len(p), nil
+}
+
+type Pipeliner interface {
+ Init() error
+ ListObjects(bucket string, prefix string) ([]string, error)
+ Download(bucket string, key string, fn string) error
+ WIPStorageId() string
+}
+
+func main() {
+ flag.Usage = func() {
+ fmt.Fprintf(flag.CommandLine.Output(), usage)
+ flag.PrintDefaults()
+ }
+ flag.Parse()
+
+ var n NullWriter
+ verboselog := log.New(n, "", log.LstdFlags)
+
+ var conn Pipeliner
+ conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
+
+ err := conn.Init()
+ if err != nil {
+ log.Fatalln("Error setting up cloud connection:", err)
+ }
+
+ log.Println("Getting list of all available objects to filter through")
+ objs, err := conn.ListObjects(conn.WIPStorageId(), "")
+ if err != nil {
+ log.Fatalln("Failed to get list of files", err)
+ }
+
+ log.Println("Downloading all best files found")
+ for _, i := range objs {
+ parts := strings.Split(i, "/")
+ if parts[len(parts) - 1] == "best" {
+ err = conn.Download(conn.WIPStorageId(), i, parts[0] + "-best")
+ if err != nil {
+ log.Fatalln("Failed to download file", i, err)
+ }
+ }
+ }
+}