From 1f7329ea444738f540d89567b1d7f2d737dd9c31 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 14 Apr 2020 12:09:20 +0100 Subject: Add getbests tool that was previously in the utils repo --- cmd/getbests/main.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 cmd/getbests/main.go (limited to 'cmd') diff --git a/cmd/getbests/main.go b/cmd/getbests/main.go new file mode 100644 index 0000000..9eca0d8 --- /dev/null +++ b/cmd/getbests/main.go @@ -0,0 +1,72 @@ +// Copyright 2020 Nick White. +// Use of this source code is governed by the GPLv3 +// license that can be found in the LICENSE file. + +// getbests downloads every 'best' file from a set of OCRed books +// stored on cloud infrastructure +package main + +import ( + "flag" + "fmt" + "log" + "strings" + + "rescribe.xyz/bookpipeline" +) + +const usage = `Usage: getbests + +Downloads every 'best' file from a set of OCRed books. This is +useful for statistics. +` + +// null writer to enable non-verbose logging to be discarded +type NullWriter bool + +func (w NullWriter) Write(p []byte) (n int, err error) { + return len(p), nil +} + +type Pipeliner interface { + Init() error + ListObjects(bucket string, prefix string) ([]string, error) + Download(bucket string, key string, fn string) error + WIPStorageId() string +} + +func main() { + flag.Usage = func() { + fmt.Fprintf(flag.CommandLine.Output(), usage) + flag.PrintDefaults() + } + flag.Parse() + + var n NullWriter + verboselog := log.New(n, "", log.LstdFlags) + + var conn Pipeliner + conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog} + + err := conn.Init() + if err != nil { + log.Fatalln("Error setting up cloud connection:", err) + } + + log.Println("Getting list of all available objects to filter through") + objs, err := conn.ListObjects(conn.WIPStorageId(), "") + if err != nil { + log.Fatalln("Failed to get list of files", err) + } + + log.Println("Downloading all best files found") + for _, i := range objs { + parts := strings.Split(i, "/") + if parts[len(parts) - 1] == "best" { + err = conn.Download(conn.WIPStorageId(), i, parts[0] + "-best") + if err != nil { + log.Fatalln("Failed to download file", i, err) + } + } + } +} -- cgit v1.2.1-24-ge1ad