From a2f98474f6e4ce35a4b191ab27b3293ebcdff418 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 3 Dec 2019 11:22:02 +0000 Subject: Don't pause between OCR page jobs; this should save us significant amounts of time when there are large numbers of pages --- cmd/bookpipeline/main.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 3a539c1..ebec36f 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -35,7 +35,6 @@ one is found this general process is followed: ` const PauseBetweenChecks = 3 * time.Minute -const PauseBetweenOCRPageChecks = 1 * time.Second const HeartbeatTime = 60 // null writer to enable non-verbose logging to be discarded @@ -611,7 +610,7 @@ func main() { } case <-checkOCRPageQueue: msg, err := conn.CheckQueue(conn.OCRPageQueueId(), HeartbeatTime*2) - checkOCRPageQueue = time.After(PauseBetweenOCRPageChecks) + checkOCRPageQueue = time.After(PauseBetweenChecks) if err != nil { log.Println("Error checking OCR Page queue", err) continue @@ -619,6 +618,9 @@ func main() { if msg.Handle == "" { continue } + // Have OCRPageQueue checked immediately after completion, as chances are high that + // there will be more pages that should be done without delay + checkOCRPageQueue = time.After(0) verboselog.Println("Message received on OCR Page queue, processing", msg.Body) err = ocrPage(msg, conn, ocr(*training), conn.OCRPageQueueId(), conn.AnalyseQueueId()) if err != nil { -- cgit v1.2.1-24-ge1ad