From 550752fa2ab493fb6d10aa9d963fc45996c0d100 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 31 Jan 2022 14:11:21 +0000 Subject: Make pipeline context-aware, so the rescribe tool can cancel jobs --- cmd/rescribe/gui.go | 37 +++++++++++++++++++++++++++++++++---- cmd/rescribe/main.go | 31 ++++++++++++++++++------------- 2 files changed, 51 insertions(+), 17 deletions(-) (limited to 'cmd/rescribe') diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index 06e6ddd..bdcc16c 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -6,6 +6,7 @@ package main import ( "bufio" + "context" "errors" "fmt" "io" @@ -221,7 +222,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error myWindow.Resize(fyne.NewSize(800, 400)) - var gobtn *widget.Button + var abortbtn, gobtn *widget.Button var fullContent *fyne.Container dir := widget.NewLabel("") @@ -272,6 +273,23 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error detail := widget.NewAccordion(widget.NewAccordionItem("Log", logarea)) + var ctx context.Context + var cancel context.CancelFunc + ctx, cancel = context.WithCancel(context.Background()) + + abortbtn = widget.NewButtonWithIcon("Abort", theme.CancelIcon(), func() { + fmt.Printf("\nAbort\n") + cancel() + progressBar.SetValue(0.0) + gobtn.SetText("Process OCR") + for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { + v.Enable() + } + abortbtn.Disable() + ctx, cancel = context.WithCancel(context.Background()) + }) + abortbtn.Disable() + gobtn = widget.NewButtonWithIcon("Start OCR", theme.UploadIcon(), func() { if dir.Text == "" { return @@ -347,6 +365,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { v.Enable() } + abortbtn.Disable() return } @@ -356,6 +375,8 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error v.Disable() } + abortbtn.Enable() + progressBar.SetValue(0.1) if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { @@ -370,6 +391,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { v.Enable() } + abortbtn.Disable() return } @@ -385,6 +407,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { v.Enable() } + abortbtn.Disable() return } @@ -399,7 +422,11 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error training = training[start:end] } - err = startProcess(log, cmd, bookdir, bookname, training, savedir, tessdir) + err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir) + if strings.HasSuffix(err.Error(), "context canceled") { + progressBar.SetValue(0.0) + return + } if err != nil { msg := fmt.Sprintf("Error during processing: %v\n", err) dialog.ShowError(errors.New(msg), myWindow) @@ -410,6 +437,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { v.Enable() } + abortbtn.Disable() return } @@ -419,6 +447,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { v.Enable() } + abortbtn.Disable() }() }) gobtn.Disable() @@ -429,8 +458,8 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error trainingBits := container.New(layout.NewBorderLayout(nil, nil, trainingLabel, nil), trainingLabel, trainingOpts) - fullContent = container.NewVBox(choices, chosen, trainingBits, gobtn, progressBar, detail) - startContent := container.NewVBox(choices, trainingBits, gobtn, progressBar, detail) + fullContent = container.NewVBox(choices, chosen, trainingBits, gobtn, abortbtn, progressBar, detail) + startContent := container.NewVBox(choices, trainingBits, gobtn, abortbtn, progressBar, detail) myWindow.SetContent(startContent) diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 3f7bd71..cd242af 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -12,6 +12,7 @@ package main import ( "archive/zip" "bytes" + "context" _ "embed" "flag" "fmt" @@ -284,7 +285,9 @@ These training files are included in rescribe, and are always available: ispdf = true } - err = startProcess(*verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir) + var ctx context.Context + + err = startProcess(ctx, *verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir) if err != nil { log.Fatalln(err) } @@ -413,7 +416,7 @@ func rmIfNotImage(f string) error { return nil } -func startProcess(logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string) error { +func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string) error { _, err := exec.Command(tessCommand, "--help").Output() if err != nil { errmsg := "Error, Can't run Tesseract\n" @@ -441,14 +444,14 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam fmt.Printf("Copying book to pipeline\n") - err = uploadbook(bookdir, bookname, conn) + err = uploadbook(ctx, bookdir, bookname, conn) if err != nil { _ = os.RemoveAll(tempdir) return fmt.Errorf("Error uploading book: %v", err) } fmt.Printf("Processing book\n") - err = processbook(trainingName, tessCommand, conn) + err = processbook(ctx, trainingName, tessCommand, conn) if err != nil { _ = os.RemoveAll(tempdir) return fmt.Errorf("Error processing book: %v", err) @@ -554,16 +557,16 @@ func addTxtVersion(hocrfn string) error { return nil } -func uploadbook(dir string, name string, conn Pipeliner) error { +func uploadbook(ctx context.Context, dir string, name string, conn Pipeliner) error { _, err := os.Stat(dir) if err != nil && !os.IsExist(err) { return fmt.Errorf("Error: directory %s not found", dir) } - err = pipeline.CheckImages(dir) + err = pipeline.CheckImages(ctx, dir) if err != nil { return fmt.Errorf("Error with images in %s: %v", dir, err) } - err = pipeline.UploadImages(dir, name, conn) + err = pipeline.UploadImages(ctx, dir, name, conn) if err != nil { return fmt.Errorf("Error saving images to process from %s: %v", dir, err) } @@ -602,7 +605,7 @@ func downloadbook(dir string, name string, conn Pipeliner) error { return nil } -func processbook(training string, tesscmd string, conn Pipeliner) error { +func processbook(ctx context.Context, training string, tesscmd string, conn Pipeliner) error { origPattern := regexp.MustCompile(`[0-9]{4}.(jpg|png)$`) wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.(jpg|png)$`) ocredPattern := regexp.MustCompile(`.hocr$`) @@ -624,6 +627,8 @@ func processbook(training string, tesscmd string, conn Pipeliner) error { for { select { + case <-ctx.Done(): + return ctx.Err() case <-checkPreQueue: msg, err := conn.CheckQueue(conn.PreQueueId(), QueueTimeoutSecs) checkPreQueue = time.After(PauseBetweenChecks) @@ -637,12 +642,12 @@ func processbook(training string, tesscmd string, conn Pipeliner) error { stopTimer(stopIfQuiet) conn.Log("Message received on preprocess queue, processing", msg.Body) fmt.Printf(" Preprocessing book (binarising and wiping)\n") - err = pipeline.ProcessBook(msg, conn, pipeline.Preprocess(thresholds), origPattern, conn.PreQueueId(), conn.OCRPageQueueId()) - fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output + err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Preprocess(thresholds), origPattern, conn.PreQueueId(), conn.OCRPageQueueId()) resetTimer(stopIfQuiet, quietTime) if err != nil { return fmt.Errorf("Error during preprocess: %v", err) } + fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output case <-checkWipeQueue: msg, err := conn.CheckQueue(conn.WipeQueueId(), QueueTimeoutSecs) checkWipeQueue = time.After(PauseBetweenChecks) @@ -656,7 +661,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error { stopTimer(stopIfQuiet) conn.Log("Message received on wipeonly queue, processing", msg.Body) fmt.Printf(" Preprocessing book (wiping only)\n") - err = pipeline.ProcessBook(msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId()) + err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId()) fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output resetTimer(stopIfQuiet, quietTime) if err != nil { @@ -677,7 +682,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error { stopTimer(stopIfQuiet) conn.Log("Message received on OCR Page queue, processing", msg.Body) fmt.Printf(".") - err = pipeline.OcrPage(msg, conn, pipeline.Ocr(training, tesscmd), conn.OCRPageQueueId(), conn.AnalyseQueueId()) + err = pipeline.OcrPage(ctx, msg, conn, pipeline.Ocr(training, tesscmd), conn.OCRPageQueueId(), conn.AnalyseQueueId()) resetTimer(stopIfQuiet, quietTime) if err != nil { return fmt.Errorf("\nError during OCR Page process: %v", err) @@ -695,7 +700,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error { stopTimer(stopIfQuiet) conn.Log("Message received on analyse queue, processing", msg.Body) fmt.Printf("\n Analysing OCR and compiling PDFs\n") - err = pipeline.ProcessBook(msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "") + err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "") resetTimer(stopIfQuiet, quietTime) if err != nil { return fmt.Errorf("Error during analysis: %v", err) -- cgit v1.2.1-24-ge1ad