From 23c1b47eb42ef8de6f5d5dd8e4afb80e8bd17497 Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 11 Dec 2019 15:01:17 +0000 Subject: Add ability to set a different training for the ocr job --- cmd/bookpipeline/main.go | 36 ++++++++++++++++++++++++++++-------- cmd/booktopipeline/main.go | 6 +++++- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 321db94..0ffb4b2 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -103,7 +103,7 @@ func up(c chan string, done chan bool, conn Pipeliner, bookname string, errc cha done <- true } -func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, bookname string, errc chan error, logger *log.Logger) { +func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, bookname string, training string, errc chan error, logger *log.Logger) { for path := range c { name := filepath.Base(path) key := filepath.Join(bookname, name) @@ -115,8 +115,8 @@ func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, b errc <- err return } - conn.GetLogger().Println("Adding", key, "to queue", toQueue) - err = conn.AddToQueue(toQueue, key) + conn.GetLogger().Println("Adding", key, training, "to queue", toQueue) + err = conn.AddToQueue(toQueue, key + " " + training) if err != nil { for range c { } // consume the rest of the receiving channel so it isn't blocked @@ -453,7 +453,17 @@ func ocrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch done := make(chan bool) errc := make(chan error) - bookname := filepath.Dir(msg.Body) + msgparts := strings.Split(msg.Body, " ") + bookparts := strings.Split(msgparts[0], "/") + var bookname string + if len(bookparts) > 1 { + bookname = filepath.Dir(msgparts[0]) + } else { + bookname = msgparts[0] + } + if len(msgparts) > 1 { + process = ocr(msgparts[1]) + } d := filepath.Join(os.TempDir(), bookname) err := os.MkdirAll(d, 0755) @@ -469,7 +479,7 @@ func ocrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch go process(processc, upc, errc, conn.GetLogger()) go up(upc, done, conn, bookname, errc, conn.GetLogger()) - dl <- msg.Body + dl <- msgparts[0] close(dl) // wait for either the done or errc channel to be sent to @@ -527,7 +537,18 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string done := make(chan bool) errc := make(chan error) - bookname := msg.Body + msgparts := strings.Split(msg.Body, " ") + bookparts := strings.Split(msgparts[0], "/") + var bookname string + if len(bookparts) > 1 { + bookname = filepath.Dir(msgparts[0]) + } else { + bookname = msgparts[0] + } + var training string + if len(msgparts) > 1 { + training = msgparts[1] + } d := filepath.Join(os.TempDir(), bookname) err := os.MkdirAll(d, 0755) @@ -542,7 +563,7 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string go download(dl, processc, conn, d, errc, conn.GetLogger()) go process(processc, upc, errc, conn.GetLogger()) if toQueue == conn.OCRPageQueueId() { - go upAndQueue(upc, done, toQueue, conn, bookname, errc, conn.GetLogger()) + go upAndQueue(upc, done, toQueue, conn, bookname, training, errc, conn.GetLogger()) } else { go up(upc, done, conn, bookname, errc, conn.GetLogger()) } @@ -577,7 +598,6 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string } if toQueue != "" && toQueue != conn.OCRPageQueueId() { - go upAndQueue(upc, done, toQueue, conn, bookname, errc, conn.GetLogger()) conn.GetLogger().Println("Sending", bookname, "to queue", toQueue) err = conn.AddToQueue(toQueue, bookname) if err != nil { diff --git a/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go index 264b6ab..425c41f 100644 --- a/cmd/booktopipeline/main.go +++ b/cmd/booktopipeline/main.go @@ -10,7 +10,7 @@ import ( "rescribe.xyz/bookpipeline" ) -const usage = `Usage: booktopipeline [-prebinarised] [-v] bookdir [bookname] +const usage = `Usage: booktopipeline [-t training] [-prebinarised] [-v] bookdir [bookname] Uploads the book in bookdir to the S3 'inprogress' bucket and adds it to the 'preprocess' SQS queue, or the 'wipeonly' queue if the @@ -52,6 +52,7 @@ func (f fileWalk) Walk(path string, info os.FileInfo, err error) error { func main() { verbose := flag.Bool("v", false, "Verbose") wipeonly := flag.Bool("prebinarised", false, "Prebinarised: only preprocessing will be to wipe") + training := flag.String("t", "", "Training file to use") flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -111,6 +112,9 @@ func main() { } } + if *training != "" { + bookname = bookname + " " + *training + } err = conn.AddToQueue(qid, bookname) if err != nil { log.Fatalln("Error adding book to queue:", err) -- cgit v1.2.1-24-ge1ad