summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorNick White <git@njw.name>2022-01-31 14:11:21 +0000
committerNick White <git@njw.name>2022-01-31 14:11:21 +0000
commit550752fa2ab493fb6d10aa9d963fc45996c0d100 (patch)
tree279d2c7c7d062f6232f363d1462539738b7e4cc8 /cmd
parent57a3dc6da88e08951060e2e6e11605eb807f54ac (diff)
Make pipeline context-aware, so the rescribe tool can cancel jobs
Diffstat (limited to 'cmd')
-rw-r--r--cmd/bookpipeline/main.go11
-rw-r--r--cmd/booktopipeline/main.go7
-rw-r--r--cmd/rescribe/gui.go37
-rw-r--r--cmd/rescribe/main.go31
4 files changed, 63 insertions, 23 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 65c9b79..4de9ea9 100644
--- a/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -9,6 +9,7 @@ package main
import (
"bytes"
+ "context"
"flag"
"fmt"
"log"
@@ -118,6 +119,8 @@ func main() {
wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.png$`)
ocredPattern := regexp.MustCompile(`.hocr$`)
+ var ctx context.Context
+
var conn Pipeliner
switch *conntype {
case "aws":
@@ -190,7 +193,7 @@ func main() {
}
conn.Log("Message received on preprocess queue, processing", msg.Body)
stopTimer(stopIfQuiet)
- err = pipeline.ProcessBook(msg, conn, pipeline.Preprocess([]float64{0.1, 0.2, 0.4, 0.5}), origPattern, conn.PreQueueId(), conn.OCRPageQueueId())
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Preprocess([]float64{0.1, 0.2, 0.4, 0.5}), origPattern, conn.PreQueueId(), conn.OCRPageQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
conn.Log("Error during preprocess", err)
@@ -208,7 +211,7 @@ func main() {
}
stopTimer(stopIfQuiet)
conn.Log("Message received on wipeonly queue, processing", msg.Body)
- err = pipeline.ProcessBook(msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId())
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
conn.Log("Error during wipe", err)
@@ -228,7 +231,7 @@ func main() {
checkOCRPageQueue = time.After(0)
stopTimer(stopIfQuiet)
conn.Log("Message received on OCR Page queue, processing", msg.Body)
- err = pipeline.OcrPage(msg, conn, pipeline.Ocr(*training, ""), conn.OCRPageQueueId(), conn.AnalyseQueueId())
+ err = pipeline.OcrPage(ctx, msg, conn, pipeline.Ocr(*training, ""), conn.OCRPageQueueId(), conn.AnalyseQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
conn.Log("Error during OCR Page process", err)
@@ -246,7 +249,7 @@ func main() {
}
stopTimer(stopIfQuiet)
conn.Log("Message received on analyse queue, processing", msg.Body)
- err = pipeline.ProcessBook(msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "")
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "")
resetTimer(stopIfQuiet, quietTime)
if err != nil {
conn.Log("Error during analysis", err)
diff --git a/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go
index b4f4d99..bf088a0 100644
--- a/cmd/booktopipeline/main.go
+++ b/cmd/booktopipeline/main.go
@@ -7,6 +7,7 @@
package main
import (
+ "context"
"flag"
"fmt"
"log"
@@ -65,6 +66,8 @@ func main() {
bookname = filepath.Base(bookdir)
}
+ var ctx context.Context
+
if *verbose {
verboselog = log.New(os.Stdout, "", log.LstdFlags)
} else {
@@ -97,7 +100,7 @@ func main() {
}
verboselog.Println("Checking that all images are valid in", bookdir)
- err = pipeline.CheckImages(bookdir)
+ err = pipeline.CheckImages(ctx, bookdir)
if err != nil {
log.Fatalln(err)
}
@@ -112,7 +115,7 @@ func main() {
}
verboselog.Println("Uploading all images are valid in", bookdir)
- err = pipeline.UploadImages(bookdir, bookname, conn)
+ err = pipeline.UploadImages(ctx, bookdir, bookname, conn)
if err != nil {
log.Fatalln(err)
}
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index 06e6ddd..bdcc16c 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -6,6 +6,7 @@ package main
import (
"bufio"
+ "context"
"errors"
"fmt"
"io"
@@ -221,7 +222,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
myWindow.Resize(fyne.NewSize(800, 400))
- var gobtn *widget.Button
+ var abortbtn, gobtn *widget.Button
var fullContent *fyne.Container
dir := widget.NewLabel("")
@@ -272,6 +273,23 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
detail := widget.NewAccordion(widget.NewAccordionItem("Log", logarea))
+ var ctx context.Context
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithCancel(context.Background())
+
+ abortbtn = widget.NewButtonWithIcon("Abort", theme.CancelIcon(), func() {
+ fmt.Printf("\nAbort\n")
+ cancel()
+ progressBar.SetValue(0.0)
+ gobtn.SetText("Process OCR")
+ for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ ctx, cancel = context.WithCancel(context.Background())
+ })
+ abortbtn.Disable()
+
gobtn = widget.NewButtonWithIcon("Start OCR", theme.UploadIcon(), func() {
if dir.Text == "" {
return
@@ -347,6 +365,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} {
v.Enable()
}
+ abortbtn.Disable()
return
}
@@ -356,6 +375,8 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
v.Disable()
}
+ abortbtn.Enable()
+
progressBar.SetValue(0.1)
if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() {
@@ -370,6 +391,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} {
v.Enable()
}
+ abortbtn.Disable()
return
}
@@ -385,6 +407,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} {
v.Enable()
}
+ abortbtn.Disable()
return
}
@@ -399,7 +422,11 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
training = training[start:end]
}
- err = startProcess(log, cmd, bookdir, bookname, training, savedir, tessdir)
+ err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir)
+ if strings.HasSuffix(err.Error(), "context canceled") {
+ progressBar.SetValue(0.0)
+ return
+ }
if err != nil {
msg := fmt.Sprintf("Error during processing: %v\n", err)
dialog.ShowError(errors.New(msg), myWindow)
@@ -410,6 +437,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} {
v.Enable()
}
+ abortbtn.Disable()
return
}
@@ -419,6 +447,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} {
v.Enable()
}
+ abortbtn.Disable()
}()
})
gobtn.Disable()
@@ -429,8 +458,8 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
trainingBits := container.New(layout.NewBorderLayout(nil, nil, trainingLabel, nil), trainingLabel, trainingOpts)
- fullContent = container.NewVBox(choices, chosen, trainingBits, gobtn, progressBar, detail)
- startContent := container.NewVBox(choices, trainingBits, gobtn, progressBar, detail)
+ fullContent = container.NewVBox(choices, chosen, trainingBits, gobtn, abortbtn, progressBar, detail)
+ startContent := container.NewVBox(choices, trainingBits, gobtn, abortbtn, progressBar, detail)
myWindow.SetContent(startContent)
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 3f7bd71..cd242af 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -12,6 +12,7 @@ package main
import (
"archive/zip"
"bytes"
+ "context"
_ "embed"
"flag"
"fmt"
@@ -284,7 +285,9 @@ These training files are included in rescribe, and are always available:
ispdf = true
}
- err = startProcess(*verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir)
+ var ctx context.Context
+
+ err = startProcess(ctx, *verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir)
if err != nil {
log.Fatalln(err)
}
@@ -413,7 +416,7 @@ func rmIfNotImage(f string) error {
return nil
}
-func startProcess(logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string) error {
+func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string) error {
_, err := exec.Command(tessCommand, "--help").Output()
if err != nil {
errmsg := "Error, Can't run Tesseract\n"
@@ -441,14 +444,14 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam
fmt.Printf("Copying book to pipeline\n")
- err = uploadbook(bookdir, bookname, conn)
+ err = uploadbook(ctx, bookdir, bookname, conn)
if err != nil {
_ = os.RemoveAll(tempdir)
return fmt.Errorf("Error uploading book: %v", err)
}
fmt.Printf("Processing book\n")
- err = processbook(trainingName, tessCommand, conn)
+ err = processbook(ctx, trainingName, tessCommand, conn)
if err != nil {
_ = os.RemoveAll(tempdir)
return fmt.Errorf("Error processing book: %v", err)
@@ -554,16 +557,16 @@ func addTxtVersion(hocrfn string) error {
return nil
}
-func uploadbook(dir string, name string, conn Pipeliner) error {
+func uploadbook(ctx context.Context, dir string, name string, conn Pipeliner) error {
_, err := os.Stat(dir)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("Error: directory %s not found", dir)
}
- err = pipeline.CheckImages(dir)
+ err = pipeline.CheckImages(ctx, dir)
if err != nil {
return fmt.Errorf("Error with images in %s: %v", dir, err)
}
- err = pipeline.UploadImages(dir, name, conn)
+ err = pipeline.UploadImages(ctx, dir, name, conn)
if err != nil {
return fmt.Errorf("Error saving images to process from %s: %v", dir, err)
}
@@ -602,7 +605,7 @@ func downloadbook(dir string, name string, conn Pipeliner) error {
return nil
}
-func processbook(training string, tesscmd string, conn Pipeliner) error {
+func processbook(ctx context.Context, training string, tesscmd string, conn Pipeliner) error {
origPattern := regexp.MustCompile(`[0-9]{4}.(jpg|png)$`)
wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.(jpg|png)$`)
ocredPattern := regexp.MustCompile(`.hocr$`)
@@ -624,6 +627,8 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
for {
select {
+ case <-ctx.Done():
+ return ctx.Err()
case <-checkPreQueue:
msg, err := conn.CheckQueue(conn.PreQueueId(), QueueTimeoutSecs)
checkPreQueue = time.After(PauseBetweenChecks)
@@ -637,12 +642,12 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on preprocess queue, processing", msg.Body)
fmt.Printf(" Preprocessing book (binarising and wiping)\n")
- err = pipeline.ProcessBook(msg, conn, pipeline.Preprocess(thresholds), origPattern, conn.PreQueueId(), conn.OCRPageQueueId())
- fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Preprocess(thresholds), origPattern, conn.PreQueueId(), conn.OCRPageQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("Error during preprocess: %v", err)
}
+ fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
case <-checkWipeQueue:
msg, err := conn.CheckQueue(conn.WipeQueueId(), QueueTimeoutSecs)
checkWipeQueue = time.After(PauseBetweenChecks)
@@ -656,7 +661,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on wipeonly queue, processing", msg.Body)
fmt.Printf(" Preprocessing book (wiping only)\n")
- err = pipeline.ProcessBook(msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId())
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId())
fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
resetTimer(stopIfQuiet, quietTime)
if err != nil {
@@ -677,7 +682,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on OCR Page queue, processing", msg.Body)
fmt.Printf(".")
- err = pipeline.OcrPage(msg, conn, pipeline.Ocr(training, tesscmd), conn.OCRPageQueueId(), conn.AnalyseQueueId())
+ err = pipeline.OcrPage(ctx, msg, conn, pipeline.Ocr(training, tesscmd), conn.OCRPageQueueId(), conn.AnalyseQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("\nError during OCR Page process: %v", err)
@@ -695,7 +700,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on analyse queue, processing", msg.Body)
fmt.Printf("\n Analysing OCR and compiling PDFs\n")
- err = pipeline.ProcessBook(msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "")
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "")
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("Error during analysis: %v", err)