diff options
| author | Nick White <git@njw.name> | 2019-12-11 15:01:17 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-12-11 15:01:17 +0000 | 
| commit | 23c1b47eb42ef8de6f5d5dd8e4afb80e8bd17497 (patch) | |
| tree | 3e3f99b74d5db373a989265b3be5092f3b292726 /cmd | |
| parent | ca1ee4ccb33784103339b4b52f8948aa0dd30263 (diff) | |
Add ability to set a different training for the ocr job
Diffstat (limited to 'cmd')
| -rw-r--r-- | cmd/bookpipeline/main.go | 36 | ||||
| -rw-r--r-- | cmd/booktopipeline/main.go | 6 | 
2 files changed, 33 insertions, 9 deletions
| diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 321db94..0ffb4b2 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -103,7 +103,7 @@ func up(c chan string, done chan bool, conn Pipeliner, bookname string, errc cha  	done <- true  } -func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, bookname string, errc chan error, logger *log.Logger) { +func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, bookname string, training string, errc chan error, logger *log.Logger) {  	for path := range c {  		name := filepath.Base(path)  		key := filepath.Join(bookname, name) @@ -115,8 +115,8 @@ func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, b  			errc <- err  			return  		} -		conn.GetLogger().Println("Adding", key, "to queue", toQueue) -		err = conn.AddToQueue(toQueue, key) +		conn.GetLogger().Println("Adding", key, training, "to queue", toQueue) +		err = conn.AddToQueue(toQueue, key + " " + training)  		if err != nil {  			for range c {  			} // consume the rest of the receiving channel so it isn't blocked @@ -453,7 +453,17 @@ func ocrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch  	done := make(chan bool)  	errc := make(chan error) -	bookname := filepath.Dir(msg.Body) +	msgparts := strings.Split(msg.Body, " ") +	bookparts := strings.Split(msgparts[0], "/") +	var bookname string +	if len(bookparts) > 1 { +		bookname = filepath.Dir(msgparts[0]) +	} else { +		bookname = msgparts[0] +	} +	if len(msgparts) > 1 { +		process = ocr(msgparts[1]) +	}  	d := filepath.Join(os.TempDir(), bookname)  	err := os.MkdirAll(d, 0755) @@ -469,7 +479,7 @@ func ocrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch  	go process(processc, upc, errc, conn.GetLogger())  	go up(upc, done, conn, bookname, errc, conn.GetLogger()) -	dl <- msg.Body +	dl <- msgparts[0]  	close(dl)  	// wait for either the done or errc channel to be sent to @@ -527,7 +537,18 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string  	done := make(chan bool)  	errc := make(chan error) -	bookname := msg.Body +	msgparts := strings.Split(msg.Body, " ") +	bookparts := strings.Split(msgparts[0], "/") +	var bookname string +	if len(bookparts) > 1 { +		bookname = filepath.Dir(msgparts[0]) +	} else { +		bookname = msgparts[0] +	} +	var training string +	if len(msgparts) > 1 { +		training = msgparts[1] +	}  	d := filepath.Join(os.TempDir(), bookname)  	err := os.MkdirAll(d, 0755) @@ -542,7 +563,7 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string  	go download(dl, processc, conn, d, errc, conn.GetLogger())  	go process(processc, upc, errc, conn.GetLogger())  	if toQueue == conn.OCRPageQueueId() { -		go upAndQueue(upc, done, toQueue, conn, bookname, errc, conn.GetLogger()) +		go upAndQueue(upc, done, toQueue, conn, bookname, training, errc, conn.GetLogger())  	} else {  		go up(upc, done, conn, bookname, errc, conn.GetLogger())  	} @@ -577,7 +598,6 @@ func processBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string  	}  	if toQueue != "" && toQueue != conn.OCRPageQueueId() { -		go upAndQueue(upc, done, toQueue, conn, bookname, errc, conn.GetLogger())  		conn.GetLogger().Println("Sending", bookname, "to queue", toQueue)  		err = conn.AddToQueue(toQueue, bookname)  		if err != nil { diff --git a/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go index 264b6ab..425c41f 100644 --- a/cmd/booktopipeline/main.go +++ b/cmd/booktopipeline/main.go @@ -10,7 +10,7 @@ import (  	"rescribe.xyz/bookpipeline"  ) -const usage = `Usage: booktopipeline [-prebinarised] [-v] bookdir [bookname] +const usage = `Usage: booktopipeline [-t training] [-prebinarised] [-v] bookdir [bookname]  Uploads the book in bookdir to the S3 'inprogress' bucket and adds it  to the 'preprocess' SQS queue, or the 'wipeonly' queue if the @@ -52,6 +52,7 @@ func (f fileWalk) Walk(path string, info os.FileInfo, err error) error {  func main() {  	verbose := flag.Bool("v", false, "Verbose")  	wipeonly := flag.Bool("prebinarised", false, "Prebinarised: only preprocessing will be to wipe") +	training := flag.String("t", "", "Training file to use")  	flag.Usage = func() {  		fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -111,6 +112,9 @@ func main() {  		}  	} +	if *training != "" { +		bookname = bookname + " " + *training +	}  	err = conn.AddToQueue(qid, bookname)  	if err != nil {  		log.Fatalln("Error adding book to queue:", err) | 
