summaryrefslogtreecommitdiff
path: root/cmd/booktopipeline/main.go
blob: f8e123d5bb8e9ffcab49fa6f1b87106bb366e95f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package main

import (
	"flag"
	"fmt"
	"log"
	"os"
	"path/filepath"

	"rescribe.xyz/bookpipeline"
)

const usage = `Usage: booktopipeline [-t training] [-prebinarised] [-v] bookdir [bookname]

Uploads the book in bookdir to the S3 'inprogress' bucket and adds it
to the 'preprocess' SQS queue, or the 'wipeonly' queue if the
prebinarised flag is set.

If bookname is omitted the last part of the bookdir is used.
`

type Pipeliner interface {
	Init() error
	PreQueueId() string
	WipeQueueId() string
	WIPStorageId() string
	AddToQueue(url string, msg string) error
	Upload(bucket string, key string, path string) error
}

// null writer to enable non-verbose logging to be discarded
type NullWriter bool

func (w NullWriter) Write(p []byte) (n int, err error) {
	return len(p), nil
}

var verboselog *log.Logger

type fileWalk chan string

func (f fileWalk) Walk(path string, info os.FileInfo, err error) error {
	if err != nil {
		return err
	}
	if !info.IsDir() {
		f <- path
	}
	return nil
}

func main() {
	verbose := flag.Bool("v", false, "Verbose")
	wipeonly := flag.Bool("prebinarised", false, "Prebinarised: only preprocessing will be to wipe")
	training := flag.String("t", "", "Training to use (training filename without the .traineddata part)")

	flag.Usage = func() {
		fmt.Fprintf(flag.CommandLine.Output(), usage)
		flag.PrintDefaults()
	}
	flag.Parse()
	if flag.NArg() < 1 || flag.NArg() > 3 {
		flag.Usage()
		return
	}

	bookdir := flag.Arg(0)
	var bookname string
	if flag.NArg() > 2 {
		bookname = flag.Arg(1)
	} else {
		bookname = filepath.Base(bookdir)
	}

	if *verbose {
		verboselog = log.New(os.Stdout, "", log.LstdFlags)
	} else {
		var n NullWriter
		verboselog = log.New(n, "", log.LstdFlags)
	}

	var conn Pipeliner
	conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
	err := conn.Init()
	if err != nil {
		log.Fatalln("Failed to set up cloud connection:", err)
	}

	var qid string
	if *wipeonly {
		qid = conn.WipeQueueId()
	} else {
		qid = conn.PreQueueId()
	}

	verboselog.Println("Walking", bookdir)
	walker := make(fileWalk)
	go func() {
		err = filepath.Walk(bookdir, walker.Walk)
		if err != nil {
			log.Fatalln("Filesystem walk failed:", err)
		}
		close(walker)
	}()

	for path := range walker {
		verboselog.Println("Uploading", path)
		name := filepath.Base(path)
		err = conn.Upload(conn.WIPStorageId(), filepath.Join(bookname, name), path)
		if err != nil {
			log.Fatalln("Failed to upload", path, err)
		}
	}

	if *training != "" {
		bookname = bookname + " " + *training
	}
	err = conn.AddToQueue(qid, bookname)
	if err != nil {
		log.Fatalln("Error adding book to queue:", err)
	}
}