1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
// Copyright 2019 Nick White.
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
// booktopipeline uploads a book to cloud storage and adds the name
// to a queue ready to be processed by the bookpipeline tool.
package main
import (
"flag"
"fmt"
"log"
"os"
"path/filepath"
"rescribe.xyz/bookpipeline"
)
const usage = `Usage: booktopipeline [-c conn] [-t training] [-prebinarised] [-v] bookdir [bookname]
Uploads the book in bookdir to the S3 'inprogress' bucket and adds it
to the 'preprocess' SQS queue, or the 'wipeonly' queue if the
prebinarised flag is set.
If bookname is omitted the last part of the bookdir is used.
`
type Pipeliner interface {
Init() error
PreQueueId() string
WipeQueueId() string
WIPStorageId() string
AddToQueue(url string, msg string) error
Upload(bucket string, key string, path string) error
}
// null writer to enable non-verbose logging to be discarded
type NullWriter bool
func (w NullWriter) Write(p []byte) (n int, err error) {
return len(p), nil
}
var verboselog *log.Logger
type fileWalk chan string
func (f fileWalk) Walk(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
f <- path
}
return nil
}
func main() {
verbose := flag.Bool("v", false, "Verbose")
conntype := flag.String("c", "aws", "connection type ('aws' or 'local')")
wipeonly := flag.Bool("prebinarised", false, "Prebinarised: only preprocessing will be to wipe")
training := flag.String("t", "", "Training to use (training filename without the .traineddata part)")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
flag.PrintDefaults()
}
flag.Parse()
if flag.NArg() < 1 || flag.NArg() > 3 {
flag.Usage()
return
}
bookdir := flag.Arg(0)
var bookname string
if flag.NArg() > 2 {
bookname = flag.Arg(1)
} else {
bookname = filepath.Base(bookdir)
}
if *verbose {
verboselog = log.New(os.Stdout, "", log.LstdFlags)
} else {
var n NullWriter
verboselog = log.New(n, "", log.LstdFlags)
}
var conn Pipeliner
switch *conntype {
case "aws":
conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
case "local":
conn = &bookpipeline.LocalConn{Logger: verboselog}
default:
log.Fatalln("Unknown connection type")
}
err := conn.Init()
if err != nil {
log.Fatalln("Failed to set up cloud connection:", err)
}
var qid string
if *wipeonly {
qid = conn.WipeQueueId()
} else {
qid = conn.PreQueueId()
}
pngdirs, _ := filepath.Glob(bookdir + "/*.png")
jpgdirs, _ := filepath.Glob(bookdir + "/*.jpg")
pngcount := len(pngdirs)
jpgcount := len(jpgdirs)
if pngcount > jpgcount {
qid = conn.WipeQueueId()
fmt.Println("Uploading book to wipe-only queue")
} else {
qid = conn.PreQueueId()
fmt.Println("Uploading book to preprocess queue")
}
verboselog.Println("Walking", bookdir)
walker := make(fileWalk)
go func() {
err = filepath.Walk(bookdir, walker.Walk)
if err != nil {
log.Fatalln("Filesystem walk failed:", err)
}
close(walker)
}()
for path := range walker {
verboselog.Println("Uploading", path)
name := filepath.Base(path)
err = conn.Upload(conn.WIPStorageId(), filepath.Join(bookname, name), path)
if err != nil {
log.Fatalln("Failed to upload", path, err)
}
}
if *training != "" {
bookname = bookname + " " + *training
}
err = conn.AddToQueue(qid, bookname)
if err != nil {
log.Fatalln("Error adding book to queue:", err)
}
}
|