summaryrefslogtreecommitdiff
path: root/cmd/getpipelinebook/main.go
blob: 9e900bf8f9f14fd3e8d2450607c87410d6ccacae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package main

import (
	"bufio"
	"flag"
	"fmt"
	"log"
	"os"
	"path/filepath"

	"rescribe.xyz/bookpipeline"
)

const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n"

// null writer to enable non-verbose logging to be discarded
type NullWriter bool

func (w NullWriter) Write(p []byte) (n int, err error) {
	return len(p), nil
}

type Pipeliner interface {
	Init() error
	ListObjects(bucket string, prefix string) ([]string, error)
	Download(bucket string, key string, fn string) error
	Upload(bucket string, key string, path string) error
	CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error)
	AddToQueue(url string, msg string) error
	DelFromQueue(url string, handle string) error
	WIPStorageId() string
}

func main() {
	all := flag.Bool("a", false, "Get all files for book, not just hOCR and analysis files")
	verbose := flag.Bool("v", false, "Verbose")
	flag.Usage = func() {
		fmt.Fprintf(flag.CommandLine.Output(), usage)
		flag.PrintDefaults()
	}
	flag.Parse()

	if flag.NArg() < 1 {
		flag.Usage()
		return
	}

	var verboselog *log.Logger
	if *verbose {
		verboselog = log.New(os.Stdout, "", log.LstdFlags)
	} else {
		var n NullWriter
		verboselog = log.New(n, "", log.LstdFlags)
	}

	var conn Pipeliner
	conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}

	verboselog.Println("Setting up AWS session")
	err := conn.Init()
	if err != nil {
		log.Fatalln("Error setting up cloud connection:", err)
	}
	verboselog.Println("Finished setting up AWS session")

	bookname := flag.Arg(0)

	err = os.MkdirAll(bookname, 0755)
	if err != nil {
		log.Fatalln("Failed to create directory", bookname, err)
	}

	if *all {
		verboselog.Println("Downloading all files for", bookname)
		objs, err := conn.ListObjects(conn.WIPStorageId(), bookname)
		if err != nil {
			log.Fatalln("Failed to get list of files for book", bookname, err)
		}
		for _, i := range objs {
			verboselog.Println("Downloading", i)
			err = conn.Download(conn.WIPStorageId(), i, i)
			if err != nil {
				log.Fatalln("Failed to download file", i, err)
			}
		}
		return
	}

	verboselog.Println("Downloading best file")
	fn := filepath.Join(bookname, "best")
	err = conn.Download(conn.WIPStorageId(), fn, fn)
	if err != nil {
		log.Fatalln("Failed to download 'best' file", err)
	}
	f, err := os.Open(fn)
	if err != nil {
		log.Fatalln("Failed to open best file", err)
	}
	defer f.Close()

	verboselog.Println("Downloading HOCR files")
	s := bufio.NewScanner(f)
	for s.Scan() {
		fn = filepath.Join(bookname, s.Text())
		verboselog.Println("Downloading file", fn)
		err = conn.Download(conn.WIPStorageId(), fn, fn)
		if err != nil {
			log.Fatalln("Failed to download file", fn, err)
		}
	}

	analyses := []string{"conf", "graph.png"}
	verboselog.Println("Downloading analysis files")
	for _, a := range analyses {
		fn = filepath.Join(bookname, a)
		verboselog.Println("Downloading file", fn)
		err = conn.Download(conn.WIPStorageId(), fn, fn)
		if err != nil {
			log.Fatalln("Failed to download file", fn, err)
		}
	}
}