From dc7b313c504d68165f1d1b085a6ce94eb6e8b55f Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 10 Jan 2022 15:10:23 +0000 Subject: rescribe: handle PDF errors much more gracefully --- cmd/rescribe/main.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'cmd/rescribe/main.go') diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 72a03d3..019d038 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -273,6 +273,11 @@ These training files are included in rescribe, and are always available: if err != nil { log.Fatalln("Error opening file as PDF:", err) } + // if this occurs then extractPdfImgs() will have recovered from + // a panic in the pdf package + if bookdir == "" { + log.Fatalln("Error opening file as PDF: image type not supported, you will need to extract images manually.") + } bookname = strings.TrimSuffix(bookname, ".pdf") @@ -299,6 +304,15 @@ These training files are included in rescribe, and are always available: // extractPdfImgs extracts all images embedded in a PDF to a // temporary directory, which is returned on success. func extractPdfImgs(path string) (string, error) { + defer func() { + // unfortunately the pdf library will panic if it sees an encoding + // it can't decode, so recover from that and give a warning + r := recover() + if r != nil { + fmt.Fprintf(os.Stderr, "Warning: Error extracting from PDF: %v\n", r) + } + }() + p, err := pdf.Open(path) if err != nil { return "", err -- cgit v1.2.1-24-ge1ad