diff options
| author | Nick White <git@njw.name> | 2022-01-10 15:10:23 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2022-01-10 15:10:23 +0000 | 
| commit | dc7b313c504d68165f1d1b085a6ce94eb6e8b55f (patch) | |
| tree | 897a8a892e6ea8130b052866b19a37484eef0784 /cmd/rescribe | |
| parent | b149abd2e6ffe7072dfef5bae380cab36554330b (diff) | |
rescribe: handle PDF errors much more gracefully
Diffstat (limited to 'cmd/rescribe')
| -rw-r--r-- | cmd/rescribe/gui.go | 33 | ||||
| -rw-r--r-- | cmd/rescribe/main.go | 14 | 
2 files changed, 46 insertions, 1 deletions
| diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index fb0450a..7ae5465 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -298,7 +298,38 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error  			}  		}() -		err = startProcess(log, cmd, dir.Text, filepath.Base(dir.Text), trainingOpts.Selected, dir.Text, tessdir) +		bookdir := dir.Text +		savedir := dir.Text +		bookname := filepath.Base(dir.Text) + +		f, err := os.Stat(bookdir) +		if err != nil { +			// TODO: surface error and cancel process better +			fmt.Fprintf(os.Stderr, "Error opening file as PDF: %v\n", err) +			return +		} + +		if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { +			bookdir, err = extractPdfImgs(bookdir) +			if err != nil { +				// TODO: surface error and cancel process better +				fmt.Fprintf(os.Stderr, "Error opening file as PDF: %v\n", err) +				return +			} + +			// happens if extractPdfImgs recovers from a PDF panic, +			// which will occur if we encounter an image we can't decode +			if bookdir == "" { +				// TODO: surface error and cancel process better +				fmt.Fprintf(os.Stderr, "Error opening PDF\nThe format of this PDF is not supported, extract the images manually into a folder first.\n") +				return +			} + +			savedir = strings.TrimSuffix(savedir, ".pdf") +			bookname = strings.TrimSuffix(bookname, ".pdf") +		} + +		err = startProcess(log, cmd, bookdir, bookname, trainingOpts.Selected, savedir, tessdir)  		if err != nil {  			// add a newline before this printing as another message from stdout  			// or stderr may well be half way through printing diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 72a03d3..019d038 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -273,6 +273,11 @@ These training files are included in rescribe, and are always available:  		if err != nil {  			log.Fatalln("Error opening file as PDF:", err)  		} +		// if this occurs then extractPdfImgs() will have recovered from +		// a panic in the pdf package +		if bookdir == "" { +			log.Fatalln("Error opening file as PDF: image type not supported, you will need to extract images manually.") +		}  		bookname = strings.TrimSuffix(bookname, ".pdf") @@ -299,6 +304,15 @@ These training files are included in rescribe, and are always available:  // extractPdfImgs extracts all images embedded in a PDF to a  // temporary directory, which is returned on success.  func extractPdfImgs(path string) (string, error) { +	defer func() { +		// unfortunately the pdf library will panic if it sees an encoding +		// it can't decode, so recover from that and give a warning +		r := recover() +		if r != nil { +			fmt.Fprintf(os.Stderr, "Warning: Error extracting from PDF: %v\n", r) +		} +	}() +  	p, err := pdf.Open(path)  	if err != nil {  		return "", err | 
