From a931288d3e813d069a2b6b010e5af4d73d308cf2 Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Mon, 13 May 2019 17:31:26 +0100
Subject: Rename cleanup package to preproc, and add basic cmd version

---
 cleanup/testdata/pg1.png                           | Bin 30803 -> 0 bytes
 .../testdata/pg1_integralwipesides_t0.02_w5.png    | Bin 33595 -> 0 bytes
 .../testdata/pg1_integralwipesides_t0.05_w25.png   | Bin 33432 -> 0 bytes
 .../testdata/pg1_integralwipesides_t0.05_w5.png    | Bin 14546 -> 0 bytes
 cleanup/wipesides.go                               | 122 ---------------------
 cleanup/wipesides_test.go                          | 105 ------------------
 6 files changed, 227 deletions(-)
 delete mode 100644 cleanup/testdata/pg1.png
 delete mode 100644 cleanup/testdata/pg1_integralwipesides_t0.02_w5.png
 delete mode 100644 cleanup/testdata/pg1_integralwipesides_t0.05_w25.png
 delete mode 100644 cleanup/testdata/pg1_integralwipesides_t0.05_w5.png
 delete mode 100644 cleanup/wipesides.go
 delete mode 100644 cleanup/wipesides_test.go

(limited to 'cleanup')

diff --git a/cleanup/testdata/pg1.png b/cleanup/testdata/pg1.png
deleted file mode 100644
index c7c4249..0000000
Binary files a/cleanup/testdata/pg1.png and /dev/null differ
diff --git a/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png b/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png
deleted file mode 100644
index 6b4ccb2..0000000
Binary files a/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png and /dev/null differ
diff --git a/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png b/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png
deleted file mode 100644
index 39dc88d..0000000
Binary files a/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png and /dev/null differ
diff --git a/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png b/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png
deleted file mode 100644
index 50df855..0000000
Binary files a/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png and /dev/null differ
diff --git a/cleanup/wipesides.go b/cleanup/wipesides.go
deleted file mode 100644
index ce3b374..0000000
--- a/cleanup/wipesides.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package cleanup
-
-// TODO: add minimum size variable (default ~30%?)
-// TODO: have the integral image specific stuff done by interface functions
-
-import (
-	"image"
-	"image/color"
-)
-
-type windowslice struct {
-	topleft     uint64
-	topright    uint64
-	bottomleft  uint64
-	bottomright uint64
-}
-
-func getwindowslice(i [][]uint64, x int, size int) windowslice {
-	maxy := len(i) - 1
-	maxx := x + size
-	if maxx > len(i[0])-1 {
-		maxx = len(i[0]) - 1
-	}
-
-	return windowslice{i[0][x], i[0][maxx], i[maxy][x], i[maxy][maxx]}
-}
-
-// checkwindow checks the window from x to see whether more than
-// thresh proportion of the pixels are white, if so it returns true.
-func checkwindow(integral [][]uint64, x int, size int, thresh float64) bool {
-	height := len(integral)
-	window := getwindowslice(integral, x, size)
-	// divide by 255 as each on pixel has the value of 255
-	sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255
-	area := size * height
-	proportion := float64(area)/float64(sum) - 1
-	return proportion <= thresh
-}
-
-// returns the proportion of the given window that is black pixels
-func proportion(integral [][]uint64, x int, size int) float64 {
-	height := len(integral)
-	window := getwindowslice(integral, x, size)
-	// divide by 255 as each on pixel has the value of 255
-	sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255
-	area := size * height
-	return float64(area)/float64(sum) - 1
-}
-
-// findbestedge goes through every vertical line from x to x+w to
-// find the one with the lowest proportion of black pixels.
-func findbestedge(integral [][]uint64, x int, w int) int {
-	var bestx int
-	var best float64
-
-	if w == 1 {
-		return x
-	}
-
-	right := x + w
-	for ; x < right; x++ {
-		prop := proportion(integral, x, 1)
-		if prop > best {
-			best = prop
-			bestx = x
-		}
-	}
-
-	return bestx
-}
-
-// Findedges finds the edges of the main content, by moving a window of wsize
-// from the middle of the image to the left and right, stopping when it reaches
-// a point at which there is a lower proportion of black pixels than thresh.
-func Findedges(integral [][]uint64, wsize int, thresh float64) (int, int) {
-	maxx := len(integral[0]) - 1
-	var lowedge, highedge int = 0, maxx
-
-	for x := maxx / 2; x < maxx-wsize; x++ {
-		if checkwindow(integral, x, wsize, thresh) {
-			highedge = findbestedge(integral, x, wsize)
-			break
-		}
-	}
-
-	for x := maxx / 2; x > 0; x-- {
-		if checkwindow(integral, x, wsize, thresh) {
-			lowedge = findbestedge(integral, x, wsize)
-			break
-		}
-	}
-
-	return lowedge, highedge
-}
-
-// Wipesides fills the sections of image not within the boundaries
-// of lowedge and highedge with white
-func Wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {
-	b := img.Bounds()
-	new := image.NewGray(b)
-
-	// set left edge white
-	for x := b.Min.X; x < lowedge; x++ {
-		for y := b.Min.Y; y < b.Max.Y; y++ {
-			new.SetGray(x, y, color.Gray{255})
-		}
-	}
-	// copy middle
-	for x := lowedge; x < highedge; x++ {
-		for y := b.Min.Y; y < b.Max.Y; y++ {
-			new.SetGray(x, y, img.GrayAt(x, y))
-		}
-	}
-	// set right edge white
-	for x := highedge; x < b.Max.X; x++ {
-		for y := b.Min.Y; y < b.Max.Y; y++ {
-			new.SetGray(x, y, color.Gray{255})
-		}
-	}
-
-	return new
-}
diff --git a/cleanup/wipesides_test.go b/cleanup/wipesides_test.go
deleted file mode 100644
index aa3e590..0000000
--- a/cleanup/wipesides_test.go
+++ /dev/null
@@ -1,105 +0,0 @@
-package cleanup
-
-// TODO: add different pages as test cases
-// TODO: test non integral img version
-
-import (
-	"flag"
-	"fmt"
-	"image"
-	"image/draw"
-	"image/png"
-	"os"
-	"testing"
-
-	"rescribe.xyz/go.git/binarize"
-)
-
-var update = flag.Bool("update", false, "update golden files")
-
-func decode(s string) (*image.Gray, error) {
-	f, err := os.Open(s)
-	defer f.Close()
-	if err != nil {
-		return nil, err
-	}
-	img, err := png.Decode(f)
-	if err != nil {
-		return nil, err
-	}
-	b := img.Bounds()
-	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
-	draw.Draw(gray, b, img, b.Min, draw.Src)
-	return gray, nil
-}
-
-func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
-	b := img1.Bounds()
-	if !b.Eq(img2.Bounds()) {
-		return false
-	}
-	for y := b.Min.Y; y < b.Max.Y; y++ {
-		for x := b.Min.X; x < b.Max.X; x++ {
-			r0, g0, b0, a0 := img1.At(x, y).RGBA()
-			r1, g1, b1, a1 := img2.At(x, y).RGBA()
-			if r0 != r1 {
-				return false
-			}
-			if g0 != g1 {
-				return false
-			}
-			if b0 != b1 {
-				return false
-			}
-			if a0 != a1 {
-				return false
-			}
-		}
-	}
-	return true
-}
-
-func TestWipeSides(t *testing.T) {
-	cases := []struct {
-		name   string
-		orig   string
-		golden string
-		thresh float64
-		wsize  int
-	}{
-		{"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.02_w5.png", 0.02, 5},
-		{"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w5.png", 0.05, 5},
-		{"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w25.png", 0.05, 25},
-	}
-
-	for _, c := range cases {
-		t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) {
-			var actual *image.Gray
-			orig, err := decode(c.orig)
-			if err != nil {
-				t.Fatalf("Could not open file %s: %v\n", c.orig, err)
-			}
-			integral := binarize.Integralimg(orig)
-			lowedge, highedge := Findedges(integral, c.wsize, c.thresh)
-			actual = Wipesides(orig, lowedge, highedge)
-			if *update {
-				f, err := os.Create(c.golden)
-				defer f.Close()
-				if err != nil {
-					t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
-				}
-				err = png.Encode(f, actual)
-				if err != nil {
-					t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
-				}
-			}
-			golden, err := decode(c.golden)
-			if err != nil {
-				t.Fatalf("Could not open file %s: %v\n", c.golden, err)
-			}
-			if !imgsequal(golden, actual) {
-				t.Errorf("Processed %s differs to %s\n", c.orig, c.golden)
-			}
-		})
-	}
-}
-- 
cgit v1.2.1-24-ge1ad