From a9bcda93f3a19bee07aa04b79dc4fc515c74de17 Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Thu, 17 Nov 2022 12:49:51 +0000
Subject: rescribe: support CCITTFaxDecode (tiff) encoded images in PDF reading

---
 cmd/rescribe/main.go | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'cmd/rescribe')

diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 74f6440..2c37dc5 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -28,6 +28,7 @@ import (
 	"strings"
 	"time"
 
+	"golang.org/x/image/tiff"
 	"rescribe.xyz/bookpipeline"
 	"rescribe.xyz/bookpipeline/internal/pipeline"
 	"rescribe.xyz/pdf"
@@ -424,7 +425,9 @@ func extractPdfImgs(ctx context.Context, path string) (string, error) {
 
 // rmIfNotImage attempts to decode a given file as an image. If it is
 // decode-able as PNG, then rename file extension from .jpg to .png,
-// if it fails to be read as PNG or JPEG it will be deleted.
+// if it is decode-able as TIFF then convert to PNG and rename file
+// extension appropriately, if it fails to be read as PNG, TIFF or
+// JPEG it will just be deleted.
 func rmIfNotImage(f string) error {
 	r, err := os.Open(f)
 	defer r.Close()
@@ -448,12 +451,39 @@ func rmIfNotImage(f string) error {
 		return fmt.Errorf("Failed to open image %s: %v\n", f, err)
 	}
 	_, err = jpeg.Decode(r)
+	if err == nil {
+		return nil
+	}
+
+	r, err = os.Open(f)
+	defer r.Close()
 	if err != nil {
+		return fmt.Errorf("Failed to open image %s: %v\n", f, err)
+	}
+	t, err := tiff.Decode(r)
+	if err == nil {
+		b := strings.TrimSuffix(f, ".jpg")
+		n, err := os.Create(b+".png")
+		defer n.Close()
+		if err != nil {
+			return fmt.Errorf("Failed to create file to store new png %s from tiff %s: %v\n", b+".png", f, err)
+		}
+		err = png.Encode(n, t)
+		if err != nil {
+			return fmt.Errorf("Failed to encode tiff as png for %s: %v\n", f, err)
+		}
 		r.Close()
 		err = os.Remove(f)
 		if err != nil {
-			return fmt.Errorf("Failed to remove invalid image %s: %v", f, err)
+			return fmt.Errorf("Failed to remove original tiff %s: %v\n", f, err)
 		}
+		return nil
+	}
+
+	r.Close()
+	err = os.Remove(f)
+	if err != nil {
+		return fmt.Errorf("Failed to remove invalid image %s: %v", f, err)
 	}
 
 	return nil
-- 
cgit v1.2.1-24-ge1ad