From a931288d3e813d069a2b6b010e5af4d73d308cf2 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 13 May 2019 17:31:26 +0100 Subject: Rename cleanup package to preproc, and add basic cmd version --- cleanup/testdata/pg1.png | Bin 30803 -> 0 bytes .../testdata/pg1_integralwipesides_t0.02_w5.png | Bin 33595 -> 0 bytes .../testdata/pg1_integralwipesides_t0.05_w25.png | Bin 33432 -> 0 bytes .../testdata/pg1_integralwipesides_t0.05_w5.png | Bin 14546 -> 0 bytes cleanup/wipesides.go | 122 --------------------- cleanup/wipesides_test.go | 105 ------------------ preproc/cmd/cleanup/main.go | 62 +++++++++++ preproc/testdata/pg1.png | Bin 0 -> 30803 bytes .../testdata/pg1_integralwipesides_t0.02_w5.png | Bin 0 -> 33595 bytes .../testdata/pg1_integralwipesides_t0.05_w25.png | Bin 0 -> 33432 bytes .../testdata/pg1_integralwipesides_t0.05_w5.png | Bin 0 -> 14546 bytes preproc/wipesides.go | 122 +++++++++++++++++++++ preproc/wipesides_test.go | 105 ++++++++++++++++++ 13 files changed, 289 insertions(+), 227 deletions(-) delete mode 100644 cleanup/testdata/pg1.png delete mode 100644 cleanup/testdata/pg1_integralwipesides_t0.02_w5.png delete mode 100644 cleanup/testdata/pg1_integralwipesides_t0.05_w25.png delete mode 100644 cleanup/testdata/pg1_integralwipesides_t0.05_w5.png delete mode 100644 cleanup/wipesides.go delete mode 100644 cleanup/wipesides_test.go create mode 100644 preproc/cmd/cleanup/main.go create mode 100644 preproc/testdata/pg1.png create mode 100644 preproc/testdata/pg1_integralwipesides_t0.02_w5.png create mode 100644 preproc/testdata/pg1_integralwipesides_t0.05_w25.png create mode 100644 preproc/testdata/pg1_integralwipesides_t0.05_w5.png create mode 100644 preproc/wipesides.go create mode 100644 preproc/wipesides_test.go diff --git a/cleanup/testdata/pg1.png b/cleanup/testdata/pg1.png deleted file mode 100644 index c7c4249..0000000 Binary files a/cleanup/testdata/pg1.png and /dev/null differ diff --git a/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png b/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png deleted file mode 100644 index 6b4ccb2..0000000 Binary files a/cleanup/testdata/pg1_integralwipesides_t0.02_w5.png and /dev/null differ diff --git a/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png b/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png deleted file mode 100644 index 39dc88d..0000000 Binary files a/cleanup/testdata/pg1_integralwipesides_t0.05_w25.png and /dev/null differ diff --git a/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png b/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png deleted file mode 100644 index 50df855..0000000 Binary files a/cleanup/testdata/pg1_integralwipesides_t0.05_w5.png and /dev/null differ diff --git a/cleanup/wipesides.go b/cleanup/wipesides.go deleted file mode 100644 index ce3b374..0000000 --- a/cleanup/wipesides.go +++ /dev/null @@ -1,122 +0,0 @@ -package cleanup - -// TODO: add minimum size variable (default ~30%?) -// TODO: have the integral image specific stuff done by interface functions - -import ( - "image" - "image/color" -) - -type windowslice struct { - topleft uint64 - topright uint64 - bottomleft uint64 - bottomright uint64 -} - -func getwindowslice(i [][]uint64, x int, size int) windowslice { - maxy := len(i) - 1 - maxx := x + size - if maxx > len(i[0])-1 { - maxx = len(i[0]) - 1 - } - - return windowslice{i[0][x], i[0][maxx], i[maxy][x], i[maxy][maxx]} -} - -// checkwindow checks the window from x to see whether more than -// thresh proportion of the pixels are white, if so it returns true. -func checkwindow(integral [][]uint64, x int, size int, thresh float64) bool { - height := len(integral) - window := getwindowslice(integral, x, size) - // divide by 255 as each on pixel has the value of 255 - sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255 - area := size * height - proportion := float64(area)/float64(sum) - 1 - return proportion <= thresh -} - -// returns the proportion of the given window that is black pixels -func proportion(integral [][]uint64, x int, size int) float64 { - height := len(integral) - window := getwindowslice(integral, x, size) - // divide by 255 as each on pixel has the value of 255 - sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255 - area := size * height - return float64(area)/float64(sum) - 1 -} - -// findbestedge goes through every vertical line from x to x+w to -// find the one with the lowest proportion of black pixels. -func findbestedge(integral [][]uint64, x int, w int) int { - var bestx int - var best float64 - - if w == 1 { - return x - } - - right := x + w - for ; x < right; x++ { - prop := proportion(integral, x, 1) - if prop > best { - best = prop - bestx = x - } - } - - return bestx -} - -// Findedges finds the edges of the main content, by moving a window of wsize -// from the middle of the image to the left and right, stopping when it reaches -// a point at which there is a lower proportion of black pixels than thresh. -func Findedges(integral [][]uint64, wsize int, thresh float64) (int, int) { - maxx := len(integral[0]) - 1 - var lowedge, highedge int = 0, maxx - - for x := maxx / 2; x < maxx-wsize; x++ { - if checkwindow(integral, x, wsize, thresh) { - highedge = findbestedge(integral, x, wsize) - break - } - } - - for x := maxx / 2; x > 0; x-- { - if checkwindow(integral, x, wsize, thresh) { - lowedge = findbestedge(integral, x, wsize) - break - } - } - - return lowedge, highedge -} - -// Wipesides fills the sections of image not within the boundaries -// of lowedge and highedge with white -func Wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - // set left edge white - for x := b.Min.X; x < lowedge; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, color.Gray{255}) - } - } - // copy middle - for x := lowedge; x < highedge; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, img.GrayAt(x, y)) - } - } - // set right edge white - for x := highedge; x < b.Max.X; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, color.Gray{255}) - } - } - - return new -} diff --git a/cleanup/wipesides_test.go b/cleanup/wipesides_test.go deleted file mode 100644 index aa3e590..0000000 --- a/cleanup/wipesides_test.go +++ /dev/null @@ -1,105 +0,0 @@ -package cleanup - -// TODO: add different pages as test cases -// TODO: test non integral img version - -import ( - "flag" - "fmt" - "image" - "image/draw" - "image/png" - "os" - "testing" - - "rescribe.xyz/go.git/binarize" -) - -var update = flag.Bool("update", false, "update golden files") - -func decode(s string) (*image.Gray, error) { - f, err := os.Open(s) - defer f.Close() - if err != nil { - return nil, err - } - img, err := png.Decode(f) - if err != nil { - return nil, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - return gray, nil -} - -func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { - b := img1.Bounds() - if !b.Eq(img2.Bounds()) { - return false - } - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - r0, g0, b0, a0 := img1.At(x, y).RGBA() - r1, g1, b1, a1 := img2.At(x, y).RGBA() - if r0 != r1 { - return false - } - if g0 != g1 { - return false - } - if b0 != b1 { - return false - } - if a0 != a1 { - return false - } - } - } - return true -} - -func TestWipeSides(t *testing.T) { - cases := []struct { - name string - orig string - golden string - thresh float64 - wsize int - }{ - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.02_w5.png", 0.02, 5}, - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w5.png", 0.05, 5}, - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w25.png", 0.05, 25}, - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) { - var actual *image.Gray - orig, err := decode(c.orig) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.orig, err) - } - integral := binarize.Integralimg(orig) - lowedge, highedge := Findedges(integral, c.wsize, c.thresh) - actual = Wipesides(orig, lowedge, highedge) - if *update { - f, err := os.Create(c.golden) - defer f.Close() - if err != nil { - t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) - } - err = png.Encode(f, actual) - if err != nil { - t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) - } - } - golden, err := decode(c.golden) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.golden, err) - } - if !imgsequal(golden, actual) { - t.Errorf("Processed %s differs to %s\n", c.orig, c.golden) - } - }) - } -} diff --git a/preproc/cmd/cleanup/main.go b/preproc/cmd/cleanup/main.go new file mode 100644 index 0000000..7ea0c84 --- /dev/null +++ b/preproc/cmd/cleanup/main.go @@ -0,0 +1,62 @@ +package main + +// TODO: add minimum size variable (default ~30%?) +// TODO: make into a small library +// TODO: have the integral image specific stuff done by interface functions + +import ( + "flag" + "fmt" + "image" + "image/draw" + _ "image/jpeg" + "image/png" + "log" + "os" + + "rescribe.xyz/go.git/binarize" + "rescribe.xyz/go.git/preproc" +) + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: cleanup [-t thresh] [-w winsize] inimg outimg\n") + flag.PrintDefaults() + } + wsize := flag.Int("w", 5, "Window size for mask finding algorithm.") + thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge.") + flag.Parse() + if flag.NArg() < 2 { + flag.Usage() + os.Exit(1) + } + + f, err := os.Open(flag.Arg(0)) + defer f.Close() + if err != nil { + log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) + } + img, _, err := image.Decode(f) + if err != nil { + log.Fatalf("Could not decode image: %v\n", err) + } + b := img.Bounds() + gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) + draw.Draw(gray, b, img, b.Min, draw.Src) + + integral := binarize.Integralimg(gray) + + lowedge, highedge := preproc.Findedges(integral, *wsize, *thresh) + + clean := preproc.Wipesides(gray, lowedge, highedge) + + f, err = os.Create(flag.Arg(1)) + if err != nil { + log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) + } + defer f.Close() + err = png.Encode(f, clean) + if err != nil { + log.Fatalf("Could not encode image: %v\n", err) + } +} diff --git a/preproc/testdata/pg1.png b/preproc/testdata/pg1.png new file mode 100644 index 0000000..c7c4249 Binary files /dev/null and b/preproc/testdata/pg1.png differ diff --git a/preproc/testdata/pg1_integralwipesides_t0.02_w5.png b/preproc/testdata/pg1_integralwipesides_t0.02_w5.png new file mode 100644 index 0000000..6b4ccb2 Binary files /dev/null and b/preproc/testdata/pg1_integralwipesides_t0.02_w5.png differ diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w25.png b/preproc/testdata/pg1_integralwipesides_t0.05_w25.png new file mode 100644 index 0000000..39dc88d Binary files /dev/null and b/preproc/testdata/pg1_integralwipesides_t0.05_w25.png differ diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w5.png b/preproc/testdata/pg1_integralwipesides_t0.05_w5.png new file mode 100644 index 0000000..50df855 Binary files /dev/null and b/preproc/testdata/pg1_integralwipesides_t0.05_w5.png differ diff --git a/preproc/wipesides.go b/preproc/wipesides.go new file mode 100644 index 0000000..2afe1d2 --- /dev/null +++ b/preproc/wipesides.go @@ -0,0 +1,122 @@ +package preproc + +// TODO: add minimum size variable (default ~30%?) +// TODO: have the integral image specific stuff done by interface functions + +import ( + "image" + "image/color" +) + +type windowslice struct { + topleft uint64 + topright uint64 + bottomleft uint64 + bottomright uint64 +} + +func getwindowslice(i [][]uint64, x int, size int) windowslice { + maxy := len(i) - 1 + maxx := x + size + if maxx > len(i[0])-1 { + maxx = len(i[0]) - 1 + } + + return windowslice{i[0][x], i[0][maxx], i[maxy][x], i[maxy][maxx]} +} + +// checkwindow checks the window from x to see whether more than +// thresh proportion of the pixels are white, if so it returns true. +func checkwindow(integral [][]uint64, x int, size int, thresh float64) bool { + height := len(integral) + window := getwindowslice(integral, x, size) + // divide by 255 as each on pixel has the value of 255 + sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255 + area := size * height + proportion := float64(area)/float64(sum) - 1 + return proportion <= thresh +} + +// returns the proportion of the given window that is black pixels +func proportion(integral [][]uint64, x int, size int) float64 { + height := len(integral) + window := getwindowslice(integral, x, size) + // divide by 255 as each on pixel has the value of 255 + sum := (window.bottomright + window.topleft - window.topright - window.bottomleft) / 255 + area := size * height + return float64(area)/float64(sum) - 1 +} + +// findbestedge goes through every vertical line from x to x+w to +// find the one with the lowest proportion of black pixels. +func findbestedge(integral [][]uint64, x int, w int) int { + var bestx int + var best float64 + + if w == 1 { + return x + } + + right := x + w + for ; x < right; x++ { + prop := proportion(integral, x, 1) + if prop > best { + best = prop + bestx = x + } + } + + return bestx +} + +// Findedges finds the edges of the main content, by moving a window of wsize +// from the middle of the image to the left and right, stopping when it reaches +// a point at which there is a lower proportion of black pixels than thresh. +func Findedges(integral [][]uint64, wsize int, thresh float64) (int, int) { + maxx := len(integral[0]) - 1 + var lowedge, highedge int = 0, maxx + + for x := maxx / 2; x < maxx-wsize; x++ { + if checkwindow(integral, x, wsize, thresh) { + highedge = findbestedge(integral, x, wsize) + break + } + } + + for x := maxx / 2; x > 0; x-- { + if checkwindow(integral, x, wsize, thresh) { + lowedge = findbestedge(integral, x, wsize) + break + } + } + + return lowedge, highedge +} + +// Wipesides fills the sections of image not within the boundaries +// of lowedge and highedge with white +func Wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray { + b := img.Bounds() + new := image.NewGray(b) + + // set left edge white + for x := b.Min.X; x < lowedge; x++ { + for y := b.Min.Y; y < b.Max.Y; y++ { + new.SetGray(x, y, color.Gray{255}) + } + } + // copy middle + for x := lowedge; x < highedge; x++ { + for y := b.Min.Y; y < b.Max.Y; y++ { + new.SetGray(x, y, img.GrayAt(x, y)) + } + } + // set right edge white + for x := highedge; x < b.Max.X; x++ { + for y := b.Min.Y; y < b.Max.Y; y++ { + new.SetGray(x, y, color.Gray{255}) + } + } + + return new +} diff --git a/preproc/wipesides_test.go b/preproc/wipesides_test.go new file mode 100644 index 0000000..76151fb --- /dev/null +++ b/preproc/wipesides_test.go @@ -0,0 +1,105 @@ +package preproc + +// TODO: add different pages as test cases +// TODO: test non integral img version + +import ( + "flag" + "fmt" + "image" + "image/draw" + "image/png" + "os" + "testing" + + "rescribe.xyz/go.git/binarize" +) + +var update = flag.Bool("update", false, "update golden files") + +func decode(s string) (*image.Gray, error) { + f, err := os.Open(s) + defer f.Close() + if err != nil { + return nil, err + } + img, err := png.Decode(f) + if err != nil { + return nil, err + } + b := img.Bounds() + gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) + draw.Draw(gray, b, img, b.Min, draw.Src) + return gray, nil +} + +func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { + b := img1.Bounds() + if !b.Eq(img2.Bounds()) { + return false + } + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + r0, g0, b0, a0 := img1.At(x, y).RGBA() + r1, g1, b1, a1 := img2.At(x, y).RGBA() + if r0 != r1 { + return false + } + if g0 != g1 { + return false + } + if b0 != b1 { + return false + } + if a0 != a1 { + return false + } + } + } + return true +} + +func TestWipeSides(t *testing.T) { + cases := []struct { + name string + orig string + golden string + thresh float64 + wsize int + }{ + {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.02_w5.png", 0.02, 5}, + {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w5.png", 0.05, 5}, + {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w25.png", 0.05, 25}, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) { + var actual *image.Gray + orig, err := decode(c.orig) + if err != nil { + t.Fatalf("Could not open file %s: %v\n", c.orig, err) + } + integral := binarize.Integralimg(orig) + lowedge, highedge := Findedges(integral, c.wsize, c.thresh) + actual = Wipesides(orig, lowedge, highedge) + if *update { + f, err := os.Create(c.golden) + defer f.Close() + if err != nil { + t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) + } + err = png.Encode(f, actual) + if err != nil { + t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) + } + } + golden, err := decode(c.golden) + if err != nil { + t.Fatalf("Could not open file %s: %v\n", c.golden, err) + } + if !imgsequal(golden, actual) { + t.Errorf("Processed %s differs to %s\n", c.orig, c.golden) + } + }) + } +} -- cgit v1.2.1-24-ge1ad