From d8ca7b584b693a2a61dd88767a81d99bc48aca32 Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Fri, 15 Feb 2019 17:47:57 +0000
Subject: Separate out binarize into a package, and start adding tests for it

---
 binarize/integralimg.go                            |   2 +-
 binarize/main.go                                   |  70 ----------------
 binarize/sauvola.go                                |   2 +-
 binarize/sauvola_test.go                           |  88 +++++++++++++++++++++
 binarize/testdata/pg1.png                          | Bin 0 -> 651071 bytes
 binarize/testdata/pg1_integralsauvola_k0.3_w19.png | Bin 0 -> 19456 bytes
 binarize/testdata/pg1_integralsauvola_k0.5_w19.png | Bin 0 -> 18241 bytes
 binarize/testdata/pg1_integralsauvola_k0.5_w41.png | Bin 0 -> 18260 bytes
 binarize/util.go                                   |   2 +-
 sauvola/main.go                                    |  69 ++++++++++++++++
 10 files changed, 160 insertions(+), 73 deletions(-)
 delete mode 100644 binarize/main.go
 create mode 100644 binarize/sauvola_test.go
 create mode 100644 binarize/testdata/pg1.png
 create mode 100644 binarize/testdata/pg1_integralsauvola_k0.3_w19.png
 create mode 100644 binarize/testdata/pg1_integralsauvola_k0.5_w19.png
 create mode 100644 binarize/testdata/pg1_integralsauvola_k0.5_w41.png
 create mode 100644 sauvola/main.go

diff --git a/binarize/integralimg.go b/binarize/integralimg.go
index c585d60..75e9cce 100644
--- a/binarize/integralimg.go
+++ b/binarize/integralimg.go
@@ -1,4 +1,4 @@
-package main
+package binarize
 
 import (
 	"image"
diff --git a/binarize/main.go b/binarize/main.go
deleted file mode 100644
index ec99c09..0000000
--- a/binarize/main.go
+++ /dev/null
@@ -1,70 +0,0 @@
-package main
-
-// TODO: could look into other algorithms, see for examples see
-//       the README at https://github.com/brandonmpetty/Doxa
-
-import (
-	"flag"
-	"fmt"
-	"image"
-	"image/draw"
-	_ "image/jpeg"
-	"image/png"
-	"log"
-	"os"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
-	return bounds.Dx() / 60
-}
-
-func main() {
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage: binarize [-w num] [-k num] inimg outimg\n")
-		flag.PrintDefaults()
-	}
-	wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
-	ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
-	flag.Parse()
-	if flag.NArg() < 2 {
-		flag.Usage()
-		os.Exit(1)
-	}
-
-	f, err := os.Open(flag.Arg(0))
-	defer f.Close()
-	if err != nil {
-		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
-	}
-	img, _, err := image.Decode(f)
-	if err != nil {
-		log.Fatalf("Could not decode image: %v\n", err)
-	}
-	b := img.Bounds()
-	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
-	draw.Draw(gray, b, img, b.Min, draw.Src)
-
-	if *wsize == 0 {
-		*wsize = autowsize(b)
-		log.Printf("Set window size to %d\n", *wsize)
-	}
-
-	if *wsize % 2 == 0 {
-		*wsize++
-	}
-
-	// TODO: come up with a way to set a good ksize automatically
-
-	thresh := IntegralSauvola(gray, *ksize, *wsize)
-
-	f, err = os.Create(flag.Arg(1))
-	if err != nil {
-		log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
-	}
-	defer f.Close()
-	err = png.Encode(f, thresh)
-	if err != nil {
-		log.Fatalf("Could not encode image: %v\n", err)
-	}
-}
diff --git a/binarize/sauvola.go b/binarize/sauvola.go
index bc311ad..ca0acf0 100644
--- a/binarize/sauvola.go
+++ b/binarize/sauvola.go
@@ -1,4 +1,4 @@
-package main
+package binarize
 
 import (
 	"image"
diff --git a/binarize/sauvola_test.go b/binarize/sauvola_test.go
new file mode 100644
index 0000000..12a040a
--- /dev/null
+++ b/binarize/sauvola_test.go
@@ -0,0 +1,88 @@
+package binarize
+
+import (
+	"fmt"
+	"image"
+	"image/draw"
+	"image/png"
+	"os"
+	"testing"
+)
+
+func decode(s string) (*image.Gray, error) {
+	f, err := os.Open(s)
+	defer f.Close()
+	if err != nil {
+		return nil, err
+	}
+	img, err := png.Decode(f)
+	if err != nil {
+		return nil, err
+	}
+	b := img.Bounds()
+	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+	draw.Draw(gray, b, img, b.Min, draw.Src)
+	return gray, nil
+}
+
+func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
+	b := img1.Bounds()
+	if ! b.Eq(img2.Bounds())  {
+		return false
+	}
+	for y := b.Min.Y; y < b.Max.Y; y++ {
+		for x := b.Min.X; x < b.Max.X; x++ {
+			r0, g0, b0, a0 := img1.At(x, y).RGBA()
+			r1, g1, b1, a1 := img2.At(x, y).RGBA()
+			if r0 != r1 {
+				return false
+			}
+			if g0 != g1 {
+				return false
+			}
+			if b0 != b1 {
+				return false
+			}
+			if a0 != a1 {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+func TestBinarization(t *testing.T) {
+	// TODO: test non-integral sauvola method too
+	cases := []struct {
+		name string
+		orig string
+		done string
+		ksize float64
+		wsize int
+	}{
+		{"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41},
+		{"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19},
+		{"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19},
+	}
+
+	for _, c := range cases {
+		t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) {
+			orig, err := decode(c.orig)
+			if err != nil {
+				t.Errorf("Could not open file %s: %v\n", c.orig, err)
+			}
+			done, err := decode(c.done)
+			if err != nil {
+				t.Errorf("Could not open file %s: %v\n", c.done, err)
+			}
+			new := IntegralSauvola(orig, c.ksize, c.wsize)
+			if ! imgsequal(done, new) {
+				t.Errorf("Binarized %s differs to %s\n", c.orig, c.done)
+			}
+		})
+	}
+}
+
+func TestIntegralImg(t *testing.T) {
+	// TODO: compare mean and stddev between integral and basic methods
+}
diff --git a/binarize/testdata/pg1.png b/binarize/testdata/pg1.png
new file mode 100644
index 0000000..2bcc4b1
Binary files /dev/null and b/binarize/testdata/pg1.png differ
diff --git a/binarize/testdata/pg1_integralsauvola_k0.3_w19.png b/binarize/testdata/pg1_integralsauvola_k0.3_w19.png
new file mode 100644
index 0000000..bdf5712
Binary files /dev/null and b/binarize/testdata/pg1_integralsauvola_k0.3_w19.png differ
diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w19.png b/binarize/testdata/pg1_integralsauvola_k0.5_w19.png
new file mode 100644
index 0000000..5db2d9a
Binary files /dev/null and b/binarize/testdata/pg1_integralsauvola_k0.5_w19.png differ
diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w41.png b/binarize/testdata/pg1_integralsauvola_k0.5_w41.png
new file mode 100644
index 0000000..050d037
Binary files /dev/null and b/binarize/testdata/pg1_integralsauvola_k0.5_w41.png differ
diff --git a/binarize/util.go b/binarize/util.go
index e7cf0f8..e7fcfe4 100644
--- a/binarize/util.go
+++ b/binarize/util.go
@@ -1,4 +1,4 @@
-package main
+package binarize
 
 import (
 	"image"
diff --git a/sauvola/main.go b/sauvola/main.go
new file mode 100644
index 0000000..c4b9562
--- /dev/null
+++ b/sauvola/main.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"image"
+	"image/draw"
+	_ "image/jpeg"
+	"image/png"
+	"log"
+	"os"
+
+	"rescribe.xyz/go.git/binarize"
+)
+
+// TODO: do more testing to see how good this assumption is
+func autowsize(bounds image.Rectangle) int {
+	return bounds.Dx() / 60
+}
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: sauvola [-w num] [-k num] inimg outimg\n")
+		flag.PrintDefaults()
+	}
+	wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
+	ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
+	flag.Parse()
+	if flag.NArg() < 2 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	f, err := os.Open(flag.Arg(0))
+	defer f.Close()
+	if err != nil {
+		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
+	}
+	img, _, err := image.Decode(f)
+	if err != nil {
+		log.Fatalf("Could not decode image: %v\n", err)
+	}
+	b := img.Bounds()
+	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+	draw.Draw(gray, b, img, b.Min, draw.Src)
+
+	if *wsize == 0 {
+		*wsize = autowsize(b)
+		log.Printf("Set window size to %d\n", *wsize)
+	}
+
+	if *wsize % 2 == 0 {
+		*wsize++
+	}
+
+	// TODO: come up with a way to set a good ksize automatically
+
+	thresh := binarize.IntegralSauvola(gray, *ksize, *wsize)
+
+	f, err = os.Create(flag.Arg(1))
+	if err != nil {
+		log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
+	}
+	defer f.Close()
+	err = png.Encode(f, thresh)
+	if err != nil {
+		log.Fatalf("Could not encode image: %v\n", err)
+	}
+}
-- 
cgit v1.2.1-24-ge1ad