From 3b12c59f37f0dc58ad1886052c03f4c81a2a5b78 Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Tue, 14 May 2019 10:29:29 +0100
Subject: Add preprocmulti tool, that outputs multiple binarisation options
 quickly

---
 preproc/cmd/preproc/main.go      |  4 +-
 preproc/cmd/preprocmulti/main.go | 96 ++++++++++++++++++++++++++++++++++++++++
 preproc/sauvola.go               | 21 +++++++++
 3 files changed, 119 insertions(+), 2 deletions(-)
 create mode 100644 preproc/cmd/preprocmulti/main.go

(limited to 'preproc')

diff --git a/preproc/cmd/preproc/main.go b/preproc/cmd/preproc/main.go
index 123895f..92e8509 100644
--- a/preproc/cmd/preproc/main.go
+++ b/preproc/cmd/preproc/main.go
@@ -23,7 +23,7 @@ func autowsize(bounds image.Rectangle) int {
 
 func main() {
 	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-t thresh] [-ws wipesize] inimg outimg\n")
+		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-wt wipethresh] [-ws wipesize] inimg outimg\n")
 		fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n")
 		flag.PrintDefaults()
 	}
@@ -31,7 +31,7 @@ func main() {
 	ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
 	btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
 	wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
-	thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge.")
+	thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")
 	flag.Parse()
 	if flag.NArg() < 2 {
 		flag.Usage()
diff --git a/preproc/cmd/preprocmulti/main.go b/preproc/cmd/preprocmulti/main.go
new file mode 100644
index 0000000..a159938
--- /dev/null
+++ b/preproc/cmd/preprocmulti/main.go
@@ -0,0 +1,96 @@
+package main
+
+// TODO: come up with a way to set a good ksize automatically
+// TODO: add minimum size variable (default ~30%?) for wipe
+
+import (
+	"flag"
+	"fmt"
+	"image"
+	"image/draw"
+	_ "image/jpeg"
+	"image/png"
+	"log"
+	"os"
+
+	"rescribe.xyz/go.git/integralimg"
+	"rescribe.xyz/go.git/preproc"
+)
+
+// TODO: do more testing to see how good this assumption is
+func autowsize(bounds image.Rectangle) int {
+	return bounds.Dx() / 60
+}
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-t thresh] [-ws wipesize] inimg outbase\n")
+		fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n")
+		fmt.Fprintf(os.Stderr, "saving images to outbase_knum.png.\n")
+		flag.PrintDefaults()
+	}
+	binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
+	btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
+	wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
+	thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge.")
+	flag.Parse()
+	if flag.NArg() < 2 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	log.Printf("Opening %s\n", flag.Arg(0))
+	f, err := os.Open(flag.Arg(0))
+	defer f.Close()
+	if err != nil {
+		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
+	}
+	img, _, err := image.Decode(f)
+	if err != nil {
+		log.Fatalf("Could not decode image: %v\n", err)
+	}
+	b := img.Bounds()
+	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+	draw.Draw(gray, b, img, b.Min, draw.Src)
+
+	if *binwsize == 0 {
+		*binwsize = autowsize(b)
+	}
+
+	if *binwsize % 2 == 0 {
+		*binwsize++
+	}
+
+	ksizes := []float64{0.2, 0.3, 0.4, 0.5, 0.6}
+
+	var threshimg image.Image
+	log.Print("Precalculating integral images")
+	integrals := integralimg.ToAllIntegralImg(gray)
+
+	for _, k := range ksizes {
+		log.Print("Binarising")
+		threshimg = preproc.PreCalcedSauvola(integrals, gray, k, *binwsize)
+
+		if *btype == "zeroinv" {
+			threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
+			if err != nil {
+				log.Fatal(err)
+			}
+		}
+
+		log.Print("Wiping sides")
+		clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh)
+
+		savefn := fmt.Sprintf("%s_%0.1f.png", flag.Arg(1), k)
+		log.Printf("Saving %s\n", savefn)
+		f, err = os.Create(savefn)
+		if err != nil {
+			log.Fatalf("Could not create file %s: %v\n", savefn, err)
+		}
+		defer f.Close()
+		err = png.Encode(f, clean)
+		if err != nil {
+			log.Fatalf("Could not encode image: %v\n", err)
+		}
+	}
+}
diff --git a/preproc/sauvola.go b/preproc/sauvola.go
index e93ea81..2ed9a92 100644
--- a/preproc/sauvola.go
+++ b/preproc/sauvola.go
@@ -53,3 +53,24 @@ func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray
 
 	return new
 }
+
+// PreCalcedSauvola Implements Sauvola's algorithm using precalculated Integral Images
+// TODO: have this be the root function that the other two reference
+func PreCalcedSauvola(integrals integralimg.WithSq, img *image.Gray, ksize float64, windowsize int) *image.Gray {
+	b := img.Bounds()
+	new := image.NewGray(b)
+
+	for y := b.Min.Y; y < b.Max.Y; y++ {
+		for x := b.Min.X; x < b.Max.X; x++ {
+			m, dev := integrals.MeanStdDevWindow(x, y, windowsize)
+			threshold := m * (1 + ksize * ((dev / 128) - 1))
+			if img.GrayAt(x, y).Y < uint8(threshold) {
+				new.SetGray(x, y, color.Gray{0})
+			} else {
+				new.SetGray(x, y, color.Gray{255})
+			}
+		}
+	}
+
+	return new
+}
-- 
cgit v1.2.1-24-ge1ad