summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-12-03 11:22:02 +0000
committerNick White <git@njw.name>2019-12-03 12:00:03 +0000
commita2f98474f6e4ce35a4b191ab27b3293ebcdff418 (patch)
treed47ab0e4376bb04edb24abeb720ddfdfc52e3946
parent329686ac970b459aefdd29b6c044e662cbe1be80 (diff)
Don't pause between OCR page jobs; this should save us significant amounts of time when there are large numbers of pages
-rw-r--r--cmd/bookpipeline/main.go6
1 files changed, 4 insertions, 2 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 3a539c1..ebec36f 100644
--- a/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -35,7 +35,6 @@ one is found this general process is followed:
`
const PauseBetweenChecks = 3 * time.Minute
-const PauseBetweenOCRPageChecks = 1 * time.Second
const HeartbeatTime = 60
// null writer to enable non-verbose logging to be discarded
@@ -611,7 +610,7 @@ func main() {
}
case <-checkOCRPageQueue:
msg, err := conn.CheckQueue(conn.OCRPageQueueId(), HeartbeatTime*2)
- checkOCRPageQueue = time.After(PauseBetweenOCRPageChecks)
+ checkOCRPageQueue = time.After(PauseBetweenChecks)
if err != nil {
log.Println("Error checking OCR Page queue", err)
continue
@@ -619,6 +618,9 @@ func main() {
if msg.Handle == "" {
continue
}
+ // Have OCRPageQueue checked immediately after completion, as chances are high that
+ // there will be more pages that should be done without delay
+ checkOCRPageQueue = time.After(0)
verboselog.Println("Message received on OCR Page queue, processing", msg.Body)
err = ocrPage(msg, conn, ocr(*training), conn.OCRPageQueueId(), conn.AnalyseQueueId())
if err != nil {