summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2022-01-10 15:10:23 +0000
committerNick White <git@njw.name>2022-01-10 15:10:23 +0000
commitdc7b313c504d68165f1d1b085a6ce94eb6e8b55f (patch)
tree897a8a892e6ea8130b052866b19a37484eef0784
parentb149abd2e6ffe7072dfef5bae380cab36554330b (diff)
rescribe: handle PDF errors much more gracefully
-rw-r--r--cmd/rescribe/gui.go33
-rw-r--r--cmd/rescribe/main.go14
2 files changed, 46 insertions, 1 deletions
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index fb0450a..7ae5465 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -298,7 +298,38 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
}
}()
- err = startProcess(log, cmd, dir.Text, filepath.Base(dir.Text), trainingOpts.Selected, dir.Text, tessdir)
+ bookdir := dir.Text
+ savedir := dir.Text
+ bookname := filepath.Base(dir.Text)
+
+ f, err := os.Stat(bookdir)
+ if err != nil {
+ // TODO: surface error and cancel process better
+ fmt.Fprintf(os.Stderr, "Error opening file as PDF: %v\n", err)
+ return
+ }
+
+ if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() {
+ bookdir, err = extractPdfImgs(bookdir)
+ if err != nil {
+ // TODO: surface error and cancel process better
+ fmt.Fprintf(os.Stderr, "Error opening file as PDF: %v\n", err)
+ return
+ }
+
+ // happens if extractPdfImgs recovers from a PDF panic,
+ // which will occur if we encounter an image we can't decode
+ if bookdir == "" {
+ // TODO: surface error and cancel process better
+ fmt.Fprintf(os.Stderr, "Error opening PDF\nThe format of this PDF is not supported, extract the images manually into a folder first.\n")
+ return
+ }
+
+ savedir = strings.TrimSuffix(savedir, ".pdf")
+ bookname = strings.TrimSuffix(bookname, ".pdf")
+ }
+
+ err = startProcess(log, cmd, bookdir, bookname, trainingOpts.Selected, savedir, tessdir)
if err != nil {
// add a newline before this printing as another message from stdout
// or stderr may well be half way through printing
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 72a03d3..019d038 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -273,6 +273,11 @@ These training files are included in rescribe, and are always available:
if err != nil {
log.Fatalln("Error opening file as PDF:", err)
}
+ // if this occurs then extractPdfImgs() will have recovered from
+ // a panic in the pdf package
+ if bookdir == "" {
+ log.Fatalln("Error opening file as PDF: image type not supported, you will need to extract images manually.")
+ }
bookname = strings.TrimSuffix(bookname, ".pdf")
@@ -299,6 +304,15 @@ These training files are included in rescribe, and are always available:
// extractPdfImgs extracts all images embedded in a PDF to a
// temporary directory, which is returned on success.
func extractPdfImgs(path string) (string, error) {
+ defer func() {
+ // unfortunately the pdf library will panic if it sees an encoding
+ // it can't decode, so recover from that and give a warning
+ r := recover()
+ if r != nil {
+ fmt.Fprintf(os.Stderr, "Warning: Error extracting from PDF: %v\n", r)
+ }
+ }()
+
p, err := pdf.Open(path)
if err != nil {
return "", err