summaryrefslogtreecommitdiff
path: root/cleanup
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-13 17:31:26 +0100
committerNick White <git@njw.name>2019-05-13 17:31:26 +0100
commita931288d3e813d069a2b6b010e5af4d73d308cf2 (patch)
tree390efabf90d484568e5ed12bd4ed0603e40dfd48 /cleanup
parent1ac5035ef55d2f97d1b98a309d83a09a84395e07 (diff)
Rename cleanup package to preproc, and add basic cmd version
Diffstat (limited to 'cleanup')
-rw-r--r--cleanup/testdata/pg1.pngbin30803 -> 0 bytes
-rw-r--r--cleanup/testdata/pg1_integralwipesides_t0.02_w5.pngbin33595 -> 0 bytes
-rw-r--r--cleanup/testdata/pg1_integralwipesides_t0.05_w25.pngbin33432 -> 0 bytes
-rw-r--r--cleanup/testdata/pg1_integralwipesides_t0.05_w5.pngbin14546 -> 0 bytes
-rw-r--r--cleanup/wipesides.go122
-rw-r--r--cleanup/wipesides_test.go105
6 files changed, 0 insertions, 227 deletions
diff --git a/cleanup/testdata/pg1.png b/cleanup/testdata/pg1.png
deleted file mode 100644
index c7c4249..0000000
--- a/cleanup/testdata/pg1.png
+++ /dev/null
Binary files differ
diff --git a/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png b/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png
deleted file mode 100644
index 6b4ccb2..0000000
--- a/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png
+++ /dev/null
Binary files differ
diff --git a/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png b/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png
deleted file mode 100644
index 39dc88d..0000000
--- a/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png
+++ /dev/null
Binary files differ
diff --git a/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png b/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png
deleted file mode 100644
index 50df855..0000000
--- a/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png
+++ /dev/null
Binary files differ
diff --git a/cleanup/wipesides.go b/cleanup/wipesides.go
deleted file mode 100644
index ce3b374..0000000
--- a/cleanup/wipesides.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package cleanup
-
-// TODO: add minimum size variable (default ~30%?)
-// TODO: have the integral image specific stuff done by interface functions
-
-import (
- "image"
- "image/color"
-)
-
-type windowslice struct {
- topleft uint64
- topright uint64
- bottomleft uint64
- bottomright uint64
-}
-
-func getwindowslice(i [][]uint64, x int, size int) windowslice {
- maxy := len(i) - 1
- maxx := x + size
- if maxx > len(i[0])-1 {
- maxx = len(i[0]) - 1
- }
-
- return windowslice{i[0][x], i[0][maxx], i[maxy][x], i[maxy][maxx]}
-}
-
-// checkwindow checks the window from x to see whether more than
-// thresh proportion of the pixels are white, if so it returns true.
-func checkwindow(integral [][]uint64, x int, size int, thresh float64) bool {
- height := len(integral)
- window := getwindowslice(integral, x, size)
- // divide by 255 as each on pixel has the value of 255
- sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255
- area := size * height
- proportion := float64(area)/float64(sum) - 1
- return proportion <= thresh
-}
-
-// returns the proportion of the given window that is black pixels
-func proportion(integral [][]uint64, x int, size int) float64 {
- height := len(integral)
- window := getwindowslice(integral, x, size)
- // divide by 255 as each on pixel has the value of 255
- sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255
- area := size * height
- return float64(area)/float64(sum) - 1
-}
-
-// findbestedge goes through every vertical line from x to x+w to
-// find the one with the lowest proportion of black pixels.
-func findbestedge(integral [][]uint64, x int, w int) int {
- var bestx int
- var best float64
-
- if w == 1 {
- return x
- }
-
- right := x + w
- for ; x < right; x++ {
- prop := proportion(integral, x, 1)
- if prop > best {
- best = prop
- bestx = x
- }
- }
-
- return bestx
-}
-
-// Findedges finds the edges of the main content, by moving a window of wsize
-// from the middle of the image to the left and right, stopping when it reaches
-// a point at which there is a lower proportion of black pixels than thresh.
-func Findedges(integral [][]uint64, wsize int, thresh float64) (int, int) {
- maxx := len(integral[0]) - 1
- var lowedge, highedge int = 0, maxx
-
- for x := maxx / 2; x < maxx-wsize; x++ {
- if checkwindow(integral, x, wsize, thresh) {
- highedge = findbestedge(integral, x, wsize)
- break
- }
- }
-
- for x := maxx / 2; x > 0; x-- {
- if checkwindow(integral, x, wsize, thresh) {
- lowedge = findbestedge(integral, x, wsize)
- break
- }
- }
-
- return lowedge, highedge
-}
-
-// Wipesides fills the sections of image not within the boundaries
-// of lowedge and highedge with white
-func Wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- // set left edge white
- for x := b.Min.X; x < lowedge; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- // copy middle
- for x := lowedge; x < highedge; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, img.GrayAt(x, y))
- }
- }
- // set right edge white
- for x := highedge; x < b.Max.X; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, color.Gray{255})
- }
- }
-
- return new
-}
diff --git a/cleanup/wipesides_test.go b/cleanup/wipesides_test.go
deleted file mode 100644
index aa3e590..0000000
--- a/cleanup/wipesides_test.go
+++ /dev/null
@@ -1,105 +0,0 @@
-package cleanup
-
-// TODO: add different pages as test cases
-// TODO: test non integral img version
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- "image/png"
- "os"
- "testing"
-
- "rescribe.xyz/go.git/binarize"
-)
-
-var update = flag.Bool("update", false, "update golden files")
-
-func decode(s string) (*image.Gray, error) {
- f, err := os.Open(s)
- defer f.Close()
- if err != nil {
- return nil, err
- }
- img, err := png.Decode(f)
- if err != nil {
- return nil, err
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
- return gray, nil
-}
-
-func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
- b := img1.Bounds()
- if !b.Eq(img2.Bounds()) {
- return false
- }
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- r0, g0, b0, a0 := img1.At(x, y).RGBA()
- r1, g1, b1, a1 := img2.At(x, y).RGBA()
- if r0 != r1 {
- return false
- }
- if g0 != g1 {
- return false
- }
- if b0 != b1 {
- return false
- }
- if a0 != a1 {
- return false
- }
- }
- }
- return true
-}
-
-func TestWipeSides(t *testing.T) {
- cases := []struct {
- name string
- orig string
- golden string
- thresh float64
- wsize int
- }{
- {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.02_w5.png", 0.02, 5},
- {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w5.png", 0.05, 5},
- {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w25.png", 0.05, 25},
- }
-
- for _, c := range cases {
- t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) {
- var actual *image.Gray
- orig, err := decode(c.orig)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.orig, err)
- }
- integral := binarize.Integralimg(orig)
- lowedge, highedge := Findedges(integral, c.wsize, c.thresh)
- actual = Wipesides(orig, lowedge, highedge)
- if *update {
- f, err := os.Create(c.golden)
- defer f.Close()
- if err != nil {
- t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
- }
- err = png.Encode(f, actual)
- if err != nil {
- t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
- }
- }
- golden, err := decode(c.golden)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.golden, err)
- }
- if !imgsequal(golden, actual) {
- t.Errorf("Processed %s differs to %s\n", c.orig, c.golden)
- }
- })
- }
-}