summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-12-11 15:01:17 +0000
committerNick White <git@njw.name>2019-12-11 15:01:17 +0000
commit23c1b47eb42ef8de6f5d5dd8e4afb80e8bd17497 (patch)
tree3e3f99b74d5db373a989265b3be5092f3b292726 /cmd
parentca1ee4ccb33784103339b4b52f8948aa0dd30263 (diff)
Add ability to set a different training for the ocr job
Diffstat (limited to 'cmd')
-rw-r--r--cmd/bookpipeline/main.go36
-rw-r--r--cmd/booktopipeline/main.go6
2 files changed, 33 insertions, 9 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 321db94..0ffb4b2 100644
--- a/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -103,7 +103,7 @@ func up(c chan string, done chan bool, conn Pipeliner, bookname string, errc cha
done <- true
}
-func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, bookname string, errc chan error, logger *log.Logger) {
+func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, bookname string, training string, errc chan error, logger *log.Logger) {
for path := range c {
name := filepath.Base(path)
key := filepath.Join(bookname, name)
@@ -115,8 +115,8 @@ func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, b
errc <- err
return
}
- conn.GetLogger().Println("Adding", key, "to queue", toQueue)
- err = conn.AddToQueue(toQueue, key)
+ conn.GetLogger().Println("Adding", key, training, "to queue", toQueue)
+ err = conn.AddToQueue(toQueue, key + " " + training)
if err != nil {
for range c {
} // consume the rest of the receiving channel so it isn't blocked
@@ -453,7 +453,17 @@ func ocrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch
done := make(chan bool)
errc := make(chan error)
- bookname := filepath.Dir(msg.Body)
+ msgparts := strings.Split(msg.Body, " ")
+ bookparts := strings.Split(msgparts[0], "/")
+ var bookname string
+ if len(bookparts) > 1 {
+ bookname = filepath.Dir(msgparts[0])
+ } else {
+ bookname = msgparts[0]
+ }
+ if len(msgparts) > 1 {
+ process = ocr(msgparts[1])
+ }
d := filepath.Join(os.TempDir(), bookname)
err := os.MkdirAll(d, 0755)
@@ -469,7 +479,7 @@ func ocrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch
go process(processc, upc, errc, conn.GetLogger())
go up(upc, done, conn, bookname, errc, conn.GetLogger())
- dl <- msg.Body
+ dl <- msgparts[0]
close(dl)
// wait for either the done or errc channel to be sent to
@@ -527,7 +537,18 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string
done := make(chan bool)
errc := make(chan error)
- bookname := msg.Body
+ msgparts := strings.Split(msg.Body, " ")
+ bookparts := strings.Split(msgparts[0], "/")
+ var bookname string
+ if len(bookparts) > 1 {
+ bookname = filepath.Dir(msgparts[0])
+ } else {
+ bookname = msgparts[0]
+ }
+ var training string
+ if len(msgparts) > 1 {
+ training = msgparts[1]
+ }
d := filepath.Join(os.TempDir(), bookname)
err := os.MkdirAll(d, 0755)
@@ -542,7 +563,7 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string
go download(dl, processc, conn, d, errc, conn.GetLogger())
go process(processc, upc, errc, conn.GetLogger())
if toQueue == conn.OCRPageQueueId() {
- go upAndQueue(upc, done, toQueue, conn, bookname, errc, conn.GetLogger())
+ go upAndQueue(upc, done, toQueue, conn, bookname, training, errc, conn.GetLogger())
} else {
go up(upc, done, conn, bookname, errc, conn.GetLogger())
}
@@ -577,7 +598,6 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string
}
if toQueue != "" && toQueue != conn.OCRPageQueueId() {
- go upAndQueue(upc, done, toQueue, conn, bookname, errc, conn.GetLogger())
conn.GetLogger().Println("Sending", bookname, "to queue", toQueue)
err = conn.AddToQueue(toQueue, bookname)
if err != nil {
diff --git a/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go
index 264b6ab..425c41f 100644
--- a/cmd/booktopipeline/main.go
+++ b/cmd/booktopipeline/main.go
@@ -10,7 +10,7 @@ import (
"rescribe.xyz/bookpipeline"
)
-const usage = `Usage: booktopipeline [-prebinarised] [-v] bookdir [bookname]
+const usage = `Usage: booktopipeline [-t training] [-prebinarised] [-v] bookdir [bookname]
Uploads the book in bookdir to the S3 'inprogress' bucket and adds it
to the 'preprocess' SQS queue, or the 'wipeonly' queue if the
@@ -52,6 +52,7 @@ func (f fileWalk) Walk(path string, info os.FileInfo, err error) error {
func main() {
verbose := flag.Bool("v", false, "Verbose")
wipeonly := flag.Bool("prebinarised", false, "Prebinarised: only preprocessing will be to wipe")
+ training := flag.String("t", "", "Training file to use")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
@@ -111,6 +112,9 @@ func main() {
}
}
+ if *training != "" {
+ bookname = bookname + " " + *training
+ }
err = conn.AddToQueue(qid, bookname)
if err != nil {
log.Fatalln("Error adding book to queue:", err)