diff options
author | Nick White <git@njw.name> | 2019-12-03 11:22:02 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-12-03 12:00:03 +0000 |
commit | a2f98474f6e4ce35a4b191ab27b3293ebcdff418 (patch) | |
tree | d47ab0e4376bb04edb24abeb720ddfdfc52e3946 | |
parent | 329686ac970b459aefdd29b6c044e662cbe1be80 (diff) |
Don't pause between OCR page jobs; this should save us significant amounts of time when there are large numbers of pages
-rw-r--r-- | cmd/bookpipeline/main.go | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 3a539c1..ebec36f 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -35,7 +35,6 @@ one is found this general process is followed: ` const PauseBetweenChecks = 3 * time.Minute -const PauseBetweenOCRPageChecks = 1 * time.Second const HeartbeatTime = 60 // null writer to enable non-verbose logging to be discarded @@ -611,7 +610,7 @@ func main() { } case <-checkOCRPageQueue: msg, err := conn.CheckQueue(conn.OCRPageQueueId(), HeartbeatTime*2) - checkOCRPageQueue = time.After(PauseBetweenOCRPageChecks) + checkOCRPageQueue = time.After(PauseBetweenChecks) if err != nil { log.Println("Error checking OCR Page queue", err) continue @@ -619,6 +618,9 @@ func main() { if msg.Handle == "" { continue } + // Have OCRPageQueue checked immediately after completion, as chances are high that + // there will be more pages that should be done without delay + checkOCRPageQueue = time.After(0) verboselog.Println("Message received on OCR Page queue, processing", msg.Body) err = ocrPage(msg, conn, ocr(*training), conn.OCRPageQueueId(), conn.AnalyseQueueId()) if err != nil { |