diff options
author | Nick White <git@njw.name> | 2022-01-10 15:10:23 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2022-01-10 15:10:23 +0000 |
commit | dc7b313c504d68165f1d1b085a6ce94eb6e8b55f (patch) | |
tree | 897a8a892e6ea8130b052866b19a37484eef0784 /cmd/rescribe | |
parent | b149abd2e6ffe7072dfef5bae380cab36554330b (diff) |
rescribe: handle PDF errors much more gracefully
Diffstat (limited to 'cmd/rescribe')
-rw-r--r-- | cmd/rescribe/gui.go | 33 | ||||
-rw-r--r-- | cmd/rescribe/main.go | 14 |
2 files changed, 46 insertions, 1 deletions
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index fb0450a..7ae5465 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -298,7 +298,38 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error } }() - err = startProcess(log, cmd, dir.Text, filepath.Base(dir.Text), trainingOpts.Selected, dir.Text, tessdir) + bookdir := dir.Text + savedir := dir.Text + bookname := filepath.Base(dir.Text) + + f, err := os.Stat(bookdir) + if err != nil { + // TODO: surface error and cancel process better + fmt.Fprintf(os.Stderr, "Error opening file as PDF: %v\n", err) + return + } + + if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { + bookdir, err = extractPdfImgs(bookdir) + if err != nil { + // TODO: surface error and cancel process better + fmt.Fprintf(os.Stderr, "Error opening file as PDF: %v\n", err) + return + } + + // happens if extractPdfImgs recovers from a PDF panic, + // which will occur if we encounter an image we can't decode + if bookdir == "" { + // TODO: surface error and cancel process better + fmt.Fprintf(os.Stderr, "Error opening PDF\nThe format of this PDF is not supported, extract the images manually into a folder first.\n") + return + } + + savedir = strings.TrimSuffix(savedir, ".pdf") + bookname = strings.TrimSuffix(bookname, ".pdf") + } + + err = startProcess(log, cmd, bookdir, bookname, trainingOpts.Selected, savedir, tessdir) if err != nil { // add a newline before this printing as another message from stdout // or stderr may well be half way through printing diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 72a03d3..019d038 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -273,6 +273,11 @@ These training files are included in rescribe, and are always available: if err != nil { log.Fatalln("Error opening file as PDF:", err) } + // if this occurs then extractPdfImgs() will have recovered from + // a panic in the pdf package + if bookdir == "" { + log.Fatalln("Error opening file as PDF: image type not supported, you will need to extract images manually.") + } bookname = strings.TrimSuffix(bookname, ".pdf") @@ -299,6 +304,15 @@ These training files are included in rescribe, and are always available: // extractPdfImgs extracts all images embedded in a PDF to a // temporary directory, which is returned on success. func extractPdfImgs(path string) (string, error) { + defer func() { + // unfortunately the pdf library will panic if it sees an encoding + // it can't decode, so recover from that and give a warning + r := recover() + if r != nil { + fmt.Fprintf(os.Stderr, "Warning: Error extracting from PDF: %v\n", r) + } + }() + p, err := pdf.Open(path) if err != nil { return "", err |