From a9bcda93f3a19bee07aa04b79dc4fc515c74de17 Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 17 Nov 2022 12:49:51 +0000 Subject: rescribe: support CCITTFaxDecode (tiff) encoded images in PDF reading --- cmd/rescribe/main.go | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'cmd') diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 74f6440..2c37dc5 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -28,6 +28,7 @@ import ( "strings" "time" + "golang.org/x/image/tiff" "rescribe.xyz/bookpipeline" "rescribe.xyz/bookpipeline/internal/pipeline" "rescribe.xyz/pdf" @@ -424,7 +425,9 @@ func extractPdfImgs(ctx context.Context, path string) (string, error) { // rmIfNotImage attempts to decode a given file as an image. If it is // decode-able as PNG, then rename file extension from .jpg to .png, -// if it fails to be read as PNG or JPEG it will be deleted. +// if it is decode-able as TIFF then convert to PNG and rename file +// extension appropriately, if it fails to be read as PNG, TIFF or +// JPEG it will just be deleted. func rmIfNotImage(f string) error { r, err := os.Open(f) defer r.Close() @@ -448,12 +451,39 @@ func rmIfNotImage(f string) error { return fmt.Errorf("Failed to open image %s: %v\n", f, err) } _, err = jpeg.Decode(r) + if err == nil { + return nil + } + + r, err = os.Open(f) + defer r.Close() if err != nil { + return fmt.Errorf("Failed to open image %s: %v\n", f, err) + } + t, err := tiff.Decode(r) + if err == nil { + b := strings.TrimSuffix(f, ".jpg") + n, err := os.Create(b+".png") + defer n.Close() + if err != nil { + return fmt.Errorf("Failed to create file to store new png %s from tiff %s: %v\n", b+".png", f, err) + } + err = png.Encode(n, t) + if err != nil { + return fmt.Errorf("Failed to encode tiff as png for %s: %v\n", f, err) + } r.Close() err = os.Remove(f) if err != nil { - return fmt.Errorf("Failed to remove invalid image %s: %v", f, err) + return fmt.Errorf("Failed to remove original tiff %s: %v\n", f, err) } + return nil + } + + r.Close() + err = os.Remove(f) + if err != nil { + return fmt.Errorf("Failed to remove invalid image %s: %v", f, err) } return nil -- cgit v1.2.1-24-ge1ad