From 7fe36a34e661e0ffc4d8cb98733e2f586cac9e8d Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 31 Jan 2022 16:29:10 +0000 Subject: rescribe: Add context cancelling to extractPdfImgs(), so it's no longer possible to get the gui into a bad state by cancelling before startProcess began (hopefully) --- cmd/rescribe/gui.go | 10 ++++++---- cmd/rescribe/main.go | 19 +++++++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index 92d8f5b..f4ac9bd 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -380,11 +380,13 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error progressBar.SetValue(0.1) if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { - bookdir, err = extractPdfImgs(bookdir) + bookdir, err = extractPdfImgs(ctx, bookdir) if err != nil { - msg := fmt.Sprintf("Error opening PDF: %v\n", bookdir, err) - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) + if !strings.HasSuffix(err.Error(), "context canceled") { + msg := fmt.Sprintf("Error opening PDF %s: %v\n", bookdir, err) + dialog.ShowError(errors.New(msg), myWindow) + fmt.Fprintf(os.Stderr, msg) + } progressBar.SetValue(0.0) gobtn.SetText("Process OCR") diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index d4acfc1..41dc83c 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -264,13 +264,15 @@ These training files are included in rescribe, and are always available: log.Fatalln("Error opening book file/dir:", err) } + var ctx context.Context + // try opening as a PDF, and extracting if !fi.IsDir() { if flag.NArg() < 2 { savedir = strings.TrimSuffix(bookdir, ".pdf") } - bookdir, err = extractPdfImgs(bookdir) + bookdir, err = extractPdfImgs(ctx, bookdir) if err != nil { log.Fatalln("Error opening file as PDF:", err) } @@ -285,8 +287,6 @@ These training files are included in rescribe, and are always available: ispdf = true } - var ctx context.Context - err = startProcess(ctx, *verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir) if err != nil { log.Fatalln(err) @@ -306,7 +306,7 @@ These training files are included in rescribe, and are always available: // extractPdfImgs extracts all images embedded in a PDF to a // temporary directory, which is returned on success. -func extractPdfImgs(path string) (string, error) { +func extractPdfImgs(ctx context.Context, path string) (string, error) { defer func() { // unfortunately the pdf library will panic if it sees an encoding // it can't decode, so recover from that and give a warning @@ -334,6 +334,11 @@ func extractPdfImgs(path string) (string, error) { } for pgnum := 1; pgnum <= p.NumPage(); pgnum++ { + select { + case <-ctx.Done(): + return "", ctx.Err() + default: + } if p.Page(pgnum).V.IsNull() { continue } @@ -376,6 +381,12 @@ func extractPdfImgs(path string) (string, error) { } // TODO: check for places where there are multiple images per page, and only keep largest ones where that's the case + select { + case <-ctx.Done(): + return "", ctx.Err() + default: + } + return tempdir, nil } -- cgit v1.2.1-24-ge1ad