summaryrefslogtreecommitdiff
path: root/pipelinepreprocess
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-08-13 16:04:01 +0100
committerNick White <git@njw.name>2019-08-13 16:04:01 +0100
commitee3302b00d796b40239fbe0e7563190ac60656ce (patch)
tree96189b4746cb36f995507605a4c1c60e2dc94e0d /pipelinepreprocess
parentab773582e72bf01e1cad71431d2ddea5f216cc07 (diff)
Add basic verbose logging capabilities to pipelinepreprocess
Diffstat (limited to 'pipelinepreprocess')
-rw-r--r--pipelinepreprocess/main.go26
1 files changed, 24 insertions, 2 deletions
diff --git a/pipelinepreprocess/main.go b/pipelinepreprocess/main.go
index d4aa6a4..9a77cf6 100644
--- a/pipelinepreprocess/main.go
+++ b/pipelinepreprocess/main.go
@@ -3,6 +3,7 @@ package main
// TODO: handle errors more smartly than just always fatal erroring
// - read the sdk guarantees on retrying and ensure we retry some times before giving up if necessary
// - cancel the current book processing rather than killing the program in the case of a nonrecoverable error
+// TODO: check if images are prebinarised and if so skip multiple binarisation
import (
"log"
@@ -19,6 +20,15 @@ import (
"rescribe.xyz/go.git/preproc"
)
+const usage = "Usage: pipelinepreprocess [-v]\n\nContinuously checks the preprocess queue for books.\nWhen a book is found it's downloaded from the S3 inprogress bucket, preprocessed, and the results are uploaded to the S3 inprogress bucket. The book name is then added to the ocr queue, and removed from the preprocess queue.\n\n -v: verbose\n"
+
+// null writer to enable non-verbose logging to be discarded
+type NullWriter bool
+func (w NullWriter) Write(p []byte) (n int, err error) {
+ return len(p), nil
+}
+var verboselog *log.Logger
+
const HeartbeatTime = 60
const PauseBetweenChecks = 60 * time.Second
@@ -34,6 +44,7 @@ const PauseBetweenChecks = 60 * time.Second
func download(dl chan string, pre chan string, downloader *s3manager.Downloader, dir string) {
for key := range dl {
+ verboselog.Println("Downloading", key)
fn := filepath.Join(dir, filepath.Base(key))
f, err := os.Create(fn)
if err != nil {
@@ -55,6 +66,7 @@ func download(dl chan string, pre chan string, downloader *s3manager.Downloader,
func preprocess(pre chan string, up chan string) {
for path := range pre {
+ verboselog.Println("Preprocessing", path)
done, err := preproc.PreProcMulti(path, []float64{0.1, 0.2, 0.4, 0.5}, "binary", 0, true, 5, 30)
if err != nil {
log.Fatalln("Error preprocessing", path, err)
@@ -68,6 +80,7 @@ func preprocess(pre chan string, up chan string) {
func up(c chan string, done chan bool, uploader *s3manager.Uploader, bookname string) {
for path := range c {
+ verboselog.Println("Uploading", path)
name := filepath.Base(path)
file, err := os.Open(path)
if err != nil {
@@ -103,8 +116,15 @@ func heartbeat(h *time.Ticker, msgHandle string, qurl string, sqssvc *sqs.SQS) {
}
func main() {
- if len(os.Args) != 1 {
- log.Fatal("Usage: pipelinepreprocess\n\nContinuously checks the preprocess queue for books.\nWhen a book is found it's downloaded from the S3 inprogress bucket, preprocessed, and the results are uploaded to the S3 inprogress bucket. The book name is then added to the ocr queue, and removed from the preprocess queue.\n")
+ if len(os.Args) > 1 {
+ if os.Args[1] == "-v" {
+ verboselog = log.New(os.Stdout, "", log.LstdFlags)
+ } else {
+ log.Fatal(usage)
+ }
+ } else {
+ var n NullWriter
+ verboselog = log.New(n, "", log.LstdFlags)
}
sess, err := session.NewSession(&aws.Config{
@@ -137,6 +157,7 @@ func main() {
ocrqurl := *result.QueueUrl
for {
+ verboselog.Println("Checking preprocessing queue for new messages")
msgResult, err := sqssvc.ReceiveMessage(&sqs.ReceiveMessageInput{
MaxNumberOfMessages: aws.Int64(1),
VisibilityTimeout: aws.Int64(HeartbeatTime * 2),
@@ -150,6 +171,7 @@ func main() {
var bookname string
if len(msgResult.Messages) > 0 {
bookname = *msgResult.Messages[0].Body
+ verboselog.Println("Message received:", bookname)
} else {
time.Sleep(PauseBetweenChecks)
continue