summaryrefslogtreecommitdiff
path: root/bookpipeline/cmd/getpipelinebook/main.go
blob: 9d0e35e342cfb9f61eb1e0784f496fa28f68c3bd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
package main

import (
	"bufio"
	"flag"
	"fmt"
	"log"
	"os"
	"path/filepath"

	"rescribe.xyz/go.git/bookpipeline"
)

const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n"

// null writer to enable non-verbose logging to be discarded
type NullWriter bool
func (w NullWriter) Write(p []byte) (n int, err error) {
	return len(p), nil
}

type Pipeliner interface {
	Init() error
	ListObjects(bucket string, prefix string) ([]string, error)
	Download(bucket string, key string, fn string) error
	Upload(bucket string, key string, path string) error
	CheckQueue(url string) (bookpipeline.Qmsg, error)
	AddToQueue(url string, msg string) error
	DelFromQueue(url string, handle string) error
	WIPStorageId() string
}

func main() {
	all := flag.Bool("a", false, "Get all files for book, not just hOCR and analysis files")
	verbose := flag.Bool("v", false, "Verbose")
	flag.Usage = func() {
		fmt.Fprintf(flag.CommandLine.Output(), usage)
		flag.PrintDefaults()
	}
	flag.Parse()

	if flag.NArg() < 1 {
		flag.Usage()
		return
	}

	var verboselog *log.Logger
	if *verbose {
		verboselog = log.New(os.Stdout, "", log.LstdFlags)
	} else {
		var n NullWriter
		verboselog = log.New(n, "", log.LstdFlags)
	}

	var conn Pipeliner
	conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}

	verboselog.Println("Setting up AWS session")
	err := conn.Init()
	if err != nil {
		log.Fatalln("Error setting up cloud connection:", err)
	}
	verboselog.Println("Finished setting up AWS session")

	bookname := flag.Arg(0)

	err = os.MkdirAll(bookname, 0755)
	if err != nil {
		log.Fatalln("Failed to create directory", bookname, err)
	}

	if *all {
		verboselog.Println("Downloading all files for", bookname)
		objs, err := conn.ListObjects(conn.WIPStorageId(), bookname)
        	if err != nil {
        	        log.Fatalln("Failed to get list of files for book", bookname, err)
        	}
		for _, i := range objs {
			verboselog.Println("Downloading", i)
			err = conn.Download(conn.WIPStorageId(), i, i)
			if err != nil {
				log.Fatalln("Failed to download file", i, err)
			}
		}
		return
	}

	verboselog.Println("Downloading best file")
	fn := filepath.Join(bookname, "best")
	err = conn.Download(conn.WIPStorageId(), fn, fn)
	if err != nil {
		log.Fatalln("Failed to download 'best' file", err)
	}
	f, err := os.Open(fn)
	if err != nil {
		log.Fatalln("Failed to open best file", err)
	}
	defer f.Close()

	verboselog.Println("Downloading HOCR files")
	s := bufio.NewScanner(f)
	for s.Scan() {
		fn = filepath.Join(bookname, s.Text())
		verboselog.Println("Downloading file", fn)
		err = conn.Download(conn.WIPStorageId(), fn, fn)
		if err != nil {
			log.Fatalln("Failed to download file", fn, err)
		}
	}

	analyses := []string{"conf", "graph.png"}
	verboselog.Println("Downloading analysis files")
	for _, a := range analyses {
		fn = filepath.Join(bookname, a)
		verboselog.Println("Downloading file", fn)
		err = conn.Download(conn.WIPStorageId(), fn, fn)
		if err != nil {
			log.Fatalln("Failed to download file", fn, err)
		}
	}
}