From 787d63fc5d13c6250bd33da5a8e1eadbe86188cd Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Tue, 8 Oct 2019 15:37:07 +0100
Subject: Continue separating the repository; remove all but preproc, and move
 integralimg package under it

---
 cmd/binarize/main.go     |  78 ++++++++++++++++++++++++++++++++++++
 cmd/preproc/main.go      |  90 +++++++++++++++++++++++++++++++++++++++++
 cmd/preprocmulti/main.go | 101 +++++++++++++++++++++++++++++++++++++++++++++++
 cmd/wipe/main.go         |  55 ++++++++++++++++++++++++++
 4 files changed, 324 insertions(+)
 create mode 100644 cmd/binarize/main.go
 create mode 100644 cmd/preproc/main.go
 create mode 100644 cmd/preprocmulti/main.go
 create mode 100644 cmd/wipe/main.go

(limited to 'cmd')

diff --git a/cmd/binarize/main.go b/cmd/binarize/main.go
new file mode 100644
index 0000000..301e42b
--- /dev/null
+++ b/cmd/binarize/main.go
@@ -0,0 +1,78 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"image"
+	"image/draw"
+	_ "image/jpeg"
+	"image/png"
+	"log"
+	"os"
+
+	"rescribe.xyz/preproc"
+)
+
+// TODO: do more testing to see how good this assumption is
+func autowsize(bounds image.Rectangle) int {
+	return bounds.Dx() / 60
+}
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n")
+		flag.PrintDefaults()
+	}
+	wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
+	ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
+	btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.")
+	flag.Parse()
+	if flag.NArg() < 2 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	f, err := os.Open(flag.Arg(0))
+	defer f.Close()
+	if err != nil {
+		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
+	}
+	img, _, err := image.Decode(f)
+	if err != nil {
+		log.Fatalf("Could not decode image: %v\n", err)
+	}
+	b := img.Bounds()
+	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+	draw.Draw(gray, b, img, b.Min, draw.Src)
+
+	if *wsize == 0 {
+		*wsize = autowsize(b)
+		log.Printf("Set window size to %d\n", *wsize)
+	}
+
+	if *wsize%2 == 0 {
+		*wsize++
+	}
+
+	// TODO: come up with a way to set a good ksize automatically
+
+	var thresh image.Image
+	thresh = preproc.IntegralSauvola(gray, *ksize, *wsize)
+
+	if *btype == "zeroinv" {
+		thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA))
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+
+	f, err = os.Create(flag.Arg(1))
+	if err != nil {
+		log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
+	}
+	defer f.Close()
+	err = png.Encode(f, thresh)
+	if err != nil {
+		log.Fatalf("Could not encode image: %v\n", err)
+	}
+}
diff --git a/cmd/preproc/main.go b/cmd/preproc/main.go
new file mode 100644
index 0000000..5d71a62
--- /dev/null
+++ b/cmd/preproc/main.go
@@ -0,0 +1,90 @@
+package main
+
+// TODO: come up with a way to set a good ksize automatically
+
+import (
+	"flag"
+	"fmt"
+	"image"
+	"image/draw"
+	_ "image/jpeg"
+	"image/png"
+	"log"
+	"os"
+
+	"rescribe.xyz/preproc"
+)
+
+// TODO: do more testing to see how good this assumption is
+func autowsize(bounds image.Rectangle) int {
+	return bounds.Dx() / 60
+}
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-nowipe] [-wt wipethresh] [-ws wipesize] inimg outimg\n")
+		fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n")
+		flag.PrintDefaults()
+	}
+	binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
+	ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
+	btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
+	min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
+	nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
+	wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
+	thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")
+	flag.Parse()
+	if flag.NArg() < 2 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	f, err := os.Open(flag.Arg(0))
+	defer f.Close()
+	if err != nil {
+		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
+	}
+	img, _, err := image.Decode(f)
+	if err != nil {
+		log.Fatalf("Could not decode image: %v\n", err)
+	}
+	b := img.Bounds()
+	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+	draw.Draw(gray, b, img, b.Min, draw.Src)
+
+	if *binwsize == 0 {
+		*binwsize = autowsize(b)
+	}
+
+	if *binwsize%2 == 0 {
+		*binwsize++
+	}
+
+	log.Print("Binarising")
+	var clean, threshimg image.Image
+	threshimg = preproc.IntegralSauvola(gray, *ksize, *binwsize)
+
+	if *btype == "zeroinv" {
+		threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+
+	if !*nowipe {
+		log.Print("Wiping sides")
+		clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)
+	} else {
+		clean = threshimg
+	}
+
+	f, err = os.Create(flag.Arg(1))
+	if err != nil {
+		log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
+	}
+	defer f.Close()
+	err = png.Encode(f, clean)
+	if err != nil {
+		log.Fatalf("Could not encode image: %v\n", err)
+	}
+}
diff --git a/cmd/preprocmulti/main.go b/cmd/preprocmulti/main.go
new file mode 100644
index 0000000..eb9c018
--- /dev/null
+++ b/cmd/preprocmulti/main.go
@@ -0,0 +1,101 @@
+package main
+
+// TODO: come up with a way to set a good ksize automatically
+
+import (
+	"flag"
+	"fmt"
+	"image"
+	"image/draw"
+	_ "image/jpeg"
+	"image/png"
+	"log"
+	"os"
+
+	"rescribe.xyz/preproc"
+	"rescribe.xyz/preproc/integralimg"
+)
+
+// TODO: do more testing to see how good this assumption is
+func autowsize(bounds image.Rectangle) int {
+	return bounds.Dx() / 60
+}
+
+func main() {
+	ksizes := []float64{0.1, 0.2, 0.4, 0.5}
+
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: preprocmulti [-bt bintype] [-bw winsize] [-m minperc] [-nowipe] [-ws wipesize] inimg outbase\n")
+		fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n")
+		fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n")
+		fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes)
+		flag.PrintDefaults()
+	}
+	binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
+	btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
+	min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
+	nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
+	wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
+	flag.Parse()
+	if flag.NArg() < 2 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	log.Printf("Opening %s\n", flag.Arg(0))
+	f, err := os.Open(flag.Arg(0))
+	defer f.Close()
+	if err != nil {
+		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
+	}
+	img, _, err := image.Decode(f)
+	if err != nil {
+		log.Fatalf("Could not decode image: %v\n", err)
+	}
+	b := img.Bounds()
+	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+	draw.Draw(gray, b, img, b.Min, draw.Src)
+
+	if *binwsize == 0 {
+		*binwsize = autowsize(b)
+	}
+
+	if *binwsize%2 == 0 {
+		*binwsize++
+	}
+
+	var clean, threshimg image.Image
+	log.Print("Precalculating integral images")
+	integrals := integralimg.ToAllIntegralImg(gray)
+
+	for _, k := range ksizes {
+		log.Print("Binarising")
+		threshimg = preproc.PreCalcedSauvola(integrals, gray, k, *binwsize)
+
+		if *btype == "zeroinv" {
+			threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
+			if err != nil {
+				log.Fatal(err)
+			}
+		}
+
+		if !*nowipe {
+			log.Print("Wiping sides")
+			clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, k*0.02, *min)
+		} else {
+			clean = threshimg
+		}
+
+		savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k)
+		log.Printf("Saving %s\n", savefn)
+		f, err = os.Create(savefn)
+		if err != nil {
+			log.Fatalf("Could not create file %s: %v\n", savefn, err)
+		}
+		defer f.Close()
+		err = png.Encode(f, clean)
+		if err != nil {
+			log.Fatalf("Could not encode image: %v\n", err)
+		}
+	}
+}
diff --git a/cmd/wipe/main.go b/cmd/wipe/main.go
new file mode 100644
index 0000000..6254946
--- /dev/null
+++ b/cmd/wipe/main.go
@@ -0,0 +1,55 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"image"
+	"image/draw"
+	_ "image/jpeg"
+	"image/png"
+	"log"
+	"os"
+
+	"rescribe.xyz/preproc"
+)
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n")
+		fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n")
+		flag.PrintDefaults()
+	}
+	min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
+	thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge. Higher means more aggressive wiping.")
+	wsize := flag.Int("w", 5, "Window size for mask finding algorithm.")
+	flag.Parse()
+	if flag.NArg() < 2 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	f, err := os.Open(flag.Arg(0))
+	defer f.Close()
+	if err != nil {
+		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
+	}
+	img, _, err := image.Decode(f)
+	if err != nil {
+		log.Fatalf("Could not decode image: %v\n", err)
+	}
+	b := img.Bounds()
+	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+	draw.Draw(gray, b, img, b.Min, draw.Src)
+
+	clean := preproc.Wipe(gray, *wsize, *thresh, *min)
+
+	f, err = os.Create(flag.Arg(1))
+	if err != nil {
+		log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
+	}
+	defer f.Close()
+	err = png.Encode(f, clean)
+	if err != nil {
+		log.Fatalf("Could not encode image: %v\n", err)
+	}
+}
-- 
cgit v1.2.1-24-ge1ad