From 02a6c66eb77a5b455bcf2d0547d2383074eb7e41 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 13 May 2019 19:23:03 +0100 Subject: Reorganise image manipulation to separate integral image parts Also unify everything else under preproc/ Note that the UsefulImg interface should be used by the main functions, to simplify things, but this hasn't been done yet. --- binarize/cmd/binarize/main.go | 78 ----------- binarize/integralimg.go | 116 ---------------- binarize/sauvola.go | 54 -------- binarize/sauvola_test.go | 108 --------------- binarize/testdata/pg1.png | Bin 651071 -> 0 bytes binarize/testdata/pg1_integralsauvola_k0.3_w19.png | Bin 19456 -> 0 bytes binarize/testdata/pg1_integralsauvola_k0.5_w19.png | Bin 18241 -> 0 bytes binarize/testdata/pg1_integralsauvola_k0.5_w41.png | Bin 18260 -> 0 bytes binarize/testdata/pg1_sauvola_k0.3_w19.png | Bin 19447 -> 0 bytes binarize/testdata/pg1_sauvola_k0.5_w19.png | Bin 18231 -> 0 bytes binarize/testdata/pg1_sauvola_k0.5_w41.png | Bin 18275 -> 0 bytes binarize/util.go | 87 ------------ integralimg/integralimg.go | 149 +++++++++++++++++++++ preproc/cmd/binarize/main.go | 78 +++++++++++ preproc/helpers_test.go | 56 ++++++++ preproc/sauvola.go | 55 ++++++++ preproc/sauvola_test.go | 62 +++++++++ preproc/testdata/pg1.png | Bin 30803 -> 651071 bytes preproc/testdata/pg1_integralsauvola_k0.3_w19.png | Bin 0 -> 19456 bytes preproc/testdata/pg1_integralsauvola_k0.5_w19.png | Bin 0 -> 18241 bytes preproc/testdata/pg1_integralsauvola_k0.5_w41.png | Bin 0 -> 18260 bytes .../testdata/pg1_integralwipesides_t0.02_w5.png | Bin 33595 -> 0 bytes .../testdata/pg1_integralwipesides_t0.05_w25.png | Bin 33432 -> 0 bytes .../testdata/pg1_integralwipesides_t0.05_w5.png | Bin 14546 -> 0 bytes preproc/testdata/pg1_sauvola_k0.3_w19.png | Bin 0 -> 19447 bytes preproc/testdata/pg1_sauvola_k0.5_w19.png | Bin 0 -> 18231 bytes preproc/testdata/pg1_sauvola_k0.5_w41.png | Bin 0 -> 18275 bytes preproc/testdata/pg2.png | Bin 0 -> 30803 bytes .../testdata/pg2_integralwipesides_t0.02_w5.png | Bin 0 -> 33595 bytes .../testdata/pg2_integralwipesides_t0.05_w25.png | Bin 0 -> 33432 bytes .../testdata/pg2_integralwipesides_t0.05_w5.png | Bin 0 -> 14546 bytes preproc/util.go | 95 +++++++++++++ preproc/wipesides.go | 4 +- preproc/wipesides_test.go | 52 +------ 34 files changed, 500 insertions(+), 494 deletions(-) delete mode 100644 binarize/cmd/binarize/main.go delete mode 100644 binarize/integralimg.go delete mode 100644 binarize/sauvola.go delete mode 100644 binarize/sauvola_test.go delete mode 100644 binarize/testdata/pg1.png delete mode 100644 binarize/testdata/pg1_integralsauvola_k0.3_w19.png delete mode 100644 binarize/testdata/pg1_integralsauvola_k0.5_w19.png delete mode 100644 binarize/testdata/pg1_integralsauvola_k0.5_w41.png delete mode 100644 binarize/testdata/pg1_sauvola_k0.3_w19.png delete mode 100644 binarize/testdata/pg1_sauvola_k0.5_w19.png delete mode 100644 binarize/testdata/pg1_sauvola_k0.5_w41.png delete mode 100644 binarize/util.go create mode 100644 integralimg/integralimg.go create mode 100644 preproc/cmd/binarize/main.go create mode 100644 preproc/helpers_test.go create mode 100644 preproc/sauvola.go create mode 100644 preproc/sauvola_test.go create mode 100644 preproc/testdata/pg1_integralsauvola_k0.3_w19.png create mode 100644 preproc/testdata/pg1_integralsauvola_k0.5_w19.png create mode 100644 preproc/testdata/pg1_integralsauvola_k0.5_w41.png delete mode 100644 preproc/testdata/pg1_integralwipesides_t0.02_w5.png delete mode 100644 preproc/testdata/pg1_integralwipesides_t0.05_w25.png delete mode 100644 preproc/testdata/pg1_integralwipesides_t0.05_w5.png create mode 100644 preproc/testdata/pg1_sauvola_k0.3_w19.png create mode 100644 preproc/testdata/pg1_sauvola_k0.5_w19.png create mode 100644 preproc/testdata/pg1_sauvola_k0.5_w41.png create mode 100644 preproc/testdata/pg2.png create mode 100644 preproc/testdata/pg2_integralwipesides_t0.02_w5.png create mode 100644 preproc/testdata/pg2_integralwipesides_t0.05_w25.png create mode 100644 preproc/testdata/pg2_integralwipesides_t0.05_w5.png create mode 100644 preproc/util.go diff --git a/binarize/cmd/binarize/main.go b/binarize/cmd/binarize/main.go deleted file mode 100644 index bda3d93..0000000 --- a/binarize/cmd/binarize/main.go +++ /dev/null @@ -1,78 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/go.git/binarize" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n") - flag.PrintDefaults() - } - wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.") - ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") - btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *wsize == 0 { - *wsize = autowsize(b) - log.Printf("Set window size to %d\n", *wsize) - } - - if *wsize % 2 == 0 { - *wsize++ - } - - // TODO: come up with a way to set a good ksize automatically - - var thresh image.Image - thresh = binarize.IntegralSauvola(gray, *ksize, *wsize) - - if *btype == "zeroinv" { - thresh, err = binarize.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, thresh) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/binarize/integralimg.go b/binarize/integralimg.go deleted file mode 100644 index 382b495..0000000 --- a/binarize/integralimg.go +++ /dev/null @@ -1,116 +0,0 @@ -package binarize - -import ( - "image" - "math" -) - -type integralwindow struct { - topleft uint64 - topright uint64 - bottomleft uint64 - bottomright uint64 - width int - height int -} - -func Integralimg(img *image.Gray) [][]uint64 { - b := img.Bounds() - var oldy, oldx, oldxy uint64 - var integral [][]uint64 - for y := b.Min.Y; y < b.Max.Y; y++ { - newrow := []uint64{} - for x := b.Min.X; x < b.Max.X; x++ { - oldx, oldy, oldxy = 0, 0, 0 - if x > 0 { - oldx = newrow[x-1] - } - if y > 0 { - oldy = integral[y-1][x] - } - if x > 0 && y > 0 { - oldxy = integral[y-1][x-1] - } - pixel := uint64(img.GrayAt(x, y).Y) - i := pixel + oldx + oldy - oldxy - newrow = append(newrow, i) - } - integral = append(integral, newrow) - } - return integral -} - -func integralimgsq(img *image.Gray) [][]uint64 { - b := img.Bounds() - var oldy, oldx, oldxy uint64 - var integral [][]uint64 - for y := b.Min.Y; y < b.Max.Y; y++ { - newrow := []uint64{} - for x := b.Min.X; x < b.Max.X; x++ { - oldx, oldy, oldxy = 0, 0, 0 - if x > 0 { - oldx = newrow[x-1] - } - if y > 0 { - oldy = integral[y-1][x] - } - if x > 0 && y > 0 { - oldxy = integral[y-1][x-1] - } - pixel := uint64(img.GrayAt(x, y).Y) - i := pixel * pixel + oldx + oldy - oldxy - newrow = append(newrow, i) - } - integral = append(integral, newrow) - } - return integral -} - -// this gets the values of the four corners of a window, which can -// be used to quickly calculate the mean of the area -func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow { - step := size / 2 - - minx, miny := 0, 0 - maxy := len(integral)-1 - maxx := len(integral[0])-1 - - if y > (step+1) { - miny = y - step - 1 - } - if x > (step+1) { - minx = x - step - 1 - } - - if maxy > (y + step) { - maxy = y + step - } - if maxx > (x + step) { - maxx = x + step - } - - return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny} -} - -func integralmean(integral [][]uint64, x int, y int, size int) float64 { - i := getintegralwindow(integral, x, y, size) - total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft) - sqsize := float64(i.width) * float64(i.height) - return total / sqsize -} - -func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) { - i := getintegralwindow(integral, x, y, size) - isq := getintegralwindow(integralsq, x, y, size) - - var total, sqtotal, sqsize float64 - - sqsize = float64(i.width) * float64(i.height) - - total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft) - sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft) - - mean := total / sqsize - variance := (sqtotal / sqsize) - (mean * mean) - return mean, math.Sqrt(variance) -} diff --git a/binarize/sauvola.go b/binarize/sauvola.go deleted file mode 100644 index 6d9c1af..0000000 --- a/binarize/sauvola.go +++ /dev/null @@ -1,54 +0,0 @@ -package binarize - -import ( - "image" - "image/color" -) - -// Implements Sauvola's algorithm for text binarization, see paper -// "Adaptive document image binarization" (2000) -func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - window := surrounding(img, x, y, windowsize) - m, dev := meanstddev(window) - threshold := m * (1 + ksize * ((dev / 128) - 1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} - -// Implements Sauvola's algorithm using Integral Images, see paper -// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" -// and -// https://stackoverflow.com/questions/13110733/computing-image-integral -func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - integral := Integralimg(img) - integralsq := integralimgsq(img) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize) - threshold := m * (1 + ksize * ((dev / 128) - 1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} diff --git a/binarize/sauvola_test.go b/binarize/sauvola_test.go deleted file mode 100644 index 5faeb61..0000000 --- a/binarize/sauvola_test.go +++ /dev/null @@ -1,108 +0,0 @@ -package binarize - -import ( - "flag" - "fmt" - "image" - "image/draw" - "image/png" - "os" - "testing" -) - -var update = flag.Bool("update", false, "update golden files") - -func decode(s string) (*image.Gray, error) { - f, err := os.Open(s) - defer f.Close() - if err != nil { - return nil, err - } - img, err := png.Decode(f) - if err != nil { - return nil, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - return gray, nil -} - -func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { - b := img1.Bounds() - if ! b.Eq(img2.Bounds()) { - return false - } - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - r0, g0, b0, a0 := img1.At(x, y).RGBA() - r1, g1, b1, a1 := img2.At(x, y).RGBA() - if r0 != r1 { - return false - } - if g0 != g1 { - return false - } - if b0 != b1 { - return false - } - if a0 != a1 { - return false - } - } - } - return true -} - -func TestBinarization(t *testing.T) { - cases := []struct { - name string - orig string - golden string - ksize float64 - wsize int - }{ - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19}, - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) { - var actual *image.Gray - orig, err := decode(c.orig) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.orig, err) - } - switch c.name { - case "integralsauvola": - actual = IntegralSauvola(orig, c.ksize, c.wsize) - case "sauvola": - actual = Sauvola(orig, c.ksize, c.wsize) - default: - t.Fatalf("No method %s\n", c.name) - } - if *update { - f, err := os.Create(c.golden) - defer f.Close() - if err != nil { - t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) - } - err = png.Encode(f, actual) - if err != nil { - t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) - } - } - golden, err := decode(c.golden) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.golden, err) - } - if ! imgsequal(golden, actual) { - t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden) - } - }) - } -} diff --git a/binarize/testdata/pg1.png b/binarize/testdata/pg1.png deleted file mode 100644 index 2bcc4b1..0000000 Binary files a/binarize/testdata/pg1.png and /dev/null differ diff --git a/binarize/testdata/pg1_integralsauvola_k0.3_w19.png b/binarize/testdata/pg1_integralsauvola_k0.3_w19.png deleted file mode 100644 index bdf5712..0000000 Binary files a/binarize/testdata/pg1_integralsauvola_k0.3_w19.png and /dev/null differ diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w19.png b/binarize/testdata/pg1_integralsauvola_k0.5_w19.png deleted file mode 100644 index 5db2d9a..0000000 Binary files a/binarize/testdata/pg1_integralsauvola_k0.5_w19.png and /dev/null differ diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w41.png b/binarize/testdata/pg1_integralsauvola_k0.5_w41.png deleted file mode 100644 index 050d037..0000000 Binary files a/binarize/testdata/pg1_integralsauvola_k0.5_w41.png and /dev/null differ diff --git a/binarize/testdata/pg1_sauvola_k0.3_w19.png b/binarize/testdata/pg1_sauvola_k0.3_w19.png deleted file mode 100644 index bcd595f..0000000 Binary files a/binarize/testdata/pg1_sauvola_k0.3_w19.png and /dev/null differ diff --git a/binarize/testdata/pg1_sauvola_k0.5_w19.png b/binarize/testdata/pg1_sauvola_k0.5_w19.png deleted file mode 100644 index 8de596c..0000000 Binary files a/binarize/testdata/pg1_sauvola_k0.5_w19.png and /dev/null differ diff --git a/binarize/testdata/pg1_sauvola_k0.5_w41.png b/binarize/testdata/pg1_sauvola_k0.5_w41.png deleted file mode 100644 index b8f50e0..0000000 Binary files a/binarize/testdata/pg1_sauvola_k0.5_w41.png and /dev/null differ diff --git a/binarize/util.go b/binarize/util.go deleted file mode 100644 index ad641c9..0000000 --- a/binarize/util.go +++ /dev/null @@ -1,87 +0,0 @@ -package binarize - -import ( - "errors" - "image" - "math" -) - -func mean(i []int) float64 { - sum := 0 - for _, n := range i { - sum += n - } - return float64(sum) / float64(len(i)) -} - -func stddev(i []int) float64 { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := sum / float64(len(i) - 1) - return math.Sqrt(variance) -} - -func meanstddev(i []int) (float64, float64) { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i) - 1) - return m, math.Sqrt(variance) -} - -// gets the pixel values surrounding a point in the image -func surrounding(img *image.Gray, x int, y int, size int) []int { - b := img.Bounds() - step := size / 2 - - miny := y - step - if miny < b.Min.Y { - miny = b.Min.Y - } - minx := x - step - if minx < b.Min.X { - minx = b.Min.X - } - maxy := y + step - if maxy > b.Max.Y { - maxy = b.Max.Y - } - maxx := x + step - if maxx > b.Max.X { - maxx = b.Max.X - } - - var s []int - for yi := miny; yi <= maxy; yi++ { - for xi := minx; xi <= maxx; xi++ { - s = append(s, int(img.GrayAt(xi, yi).Y)) - } - } - return s -} - -func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) { - b := bin.Bounds() - if ! b.Eq(orig.Bounds()) { - return orig, errors.New("bin and orig images need to be the same dimensions") - } - newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy())) - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - if bin.GrayAt(x, y).Y == 255 { - newimg.Set(x, y, bin.GrayAt(x, y)) - } else { - newimg.Set(x, y, orig.At(x, y)) - } - } - } - - return newimg, nil -} diff --git a/integralimg/integralimg.go b/integralimg/integralimg.go new file mode 100644 index 0000000..31f3e53 --- /dev/null +++ b/integralimg/integralimg.go @@ -0,0 +1,149 @@ +package integralimg + +import ( + "image" + "math" +) + +// I is the Integral Image +type I [][]uint64 + +// Sq contains an Integral Image and its Square +type WithSq struct { + Img I + Sq I +} + +// Window is a part of an Integral Image +type Window struct { + topleft uint64 + topright uint64 + bottomleft uint64 + bottomright uint64 + width int + height int +} + +// ToIntegralImg creates an integral image +func ToIntegralImg(img *image.Gray) I { + var integral I + var oldy, oldx, oldxy uint64 + b := img.Bounds() + for y := b.Min.Y; y < b.Max.Y; y++ { + newrow := []uint64{} + for x := b.Min.X; x < b.Max.X; x++ { + oldx, oldy, oldxy = 0, 0, 0 + if x > 0 { + oldx = newrow[x-1] + } + if y > 0 { + oldy = integral[y-1][x] + } + if x > 0 && y > 0 { + oldxy = integral[y-1][x-1] + } + pixel := uint64(img.GrayAt(x, y).Y) + i := pixel + oldx + oldy - oldxy + newrow = append(newrow, i) + } + integral = append(integral, newrow) + } + return integral +} + +// ToSqIntegralImg creates an integral image of the square of all +// pixel values +func ToSqIntegralImg(img *image.Gray) I { + var integral I + var oldy, oldx, oldxy uint64 + b := img.Bounds() + for y := b.Min.Y; y < b.Max.Y; y++ { + newrow := []uint64{} + for x := b.Min.X; x < b.Max.X; x++ { + oldx, oldy, oldxy = 0, 0, 0 + if x > 0 { + oldx = newrow[x-1] + } + if y > 0 { + oldy = integral[y-1][x] + } + if x > 0 && y > 0 { + oldxy = integral[y-1][x-1] + } + pixel := uint64(img.GrayAt(x, y).Y) + i := pixel * pixel + oldx + oldy - oldxy + newrow = append(newrow, i) + } + integral = append(integral, newrow) + } + return integral +} + +// ToAllIntegralImg creates a WithSq containing a regular and +// squared Integral Image +func ToAllIntegralImg(img *image.Gray) WithSq { + var s WithSq + s.Img = ToIntegralImg(img) + s.Sq = ToSqIntegralImg(img) + return s +} + + +// GetWindow gets the values of the corners of a part of an +// Integral Image, plus the dimensions of the part, which can +// be used to quickly calculate the mean of the area +func (i I) GetWindow(x, y, size int) Window { + step := size / 2 + + minx, miny := 0, 0 + maxy := len(i)-1 + maxx := len(i[0])-1 + + if y > (step+1) { + miny = y - step - 1 + } + if x > (step+1) { + minx = x - step - 1 + } + + if maxy > (y + step) { + maxy = y + step + } + if maxx > (x + step) { + maxx = x + step + } + + return Window { i[miny][minx], i[miny][maxx], i[maxy][minx], i[maxy][maxx], maxx-minx, maxy-miny} +} + +// Sum returns the sum of all pixels in a Window +func (w Window) Sum() uint64 { + return w.bottomright + w.topleft - w.topright - w.bottomleft +} + +// Size returns the total size of a Window +func (w Window) Size() int { + return w.width * w.height +} + +// Mean returns the average value of pixels in a Window +func (w Window) Mean() float64 { + return float64(w.Sum()) / float64(w.Size()) +} + +// MeanWindow calculates the mean value of a section of an Integral +// Image +func (i I) MeanWindow(x, y, size int) float64 { + return i.GetWindow(x, y, size).Mean() +} + +// MeanStdDevWindow calculates the mean and standard deviation of +// a section on an Integral Image +func (i WithSq) MeanStdDevWindow(x, y, size int) (float64, float64) { + imean := i.Img.GetWindow(x, y, size).Mean() + smean := i.Sq.GetWindow(x, y, size).Mean() + + variance := smean - (imean * imean) + + return imean, math.Sqrt(variance) +} diff --git a/preproc/cmd/binarize/main.go b/preproc/cmd/binarize/main.go new file mode 100644 index 0000000..c274f9c --- /dev/null +++ b/preproc/cmd/binarize/main.go @@ -0,0 +1,78 @@ +package main + +import ( + "flag" + "fmt" + "image" + "image/draw" + _ "image/jpeg" + "image/png" + "log" + "os" + + "rescribe.xyz/go.git/preproc" +) + +// TODO: do more testing to see how good this assumption is +func autowsize(bounds image.Rectangle) int { + return bounds.Dx() / 60 +} + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n") + flag.PrintDefaults() + } + wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.") + ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") + btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.") + flag.Parse() + if flag.NArg() < 2 { + flag.Usage() + os.Exit(1) + } + + f, err := os.Open(flag.Arg(0)) + defer f.Close() + if err != nil { + log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) + } + img, _, err := image.Decode(f) + if err != nil { + log.Fatalf("Could not decode image: %v\n", err) + } + b := img.Bounds() + gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) + draw.Draw(gray, b, img, b.Min, draw.Src) + + if *wsize == 0 { + *wsize = autowsize(b) + log.Printf("Set window size to %d\n", *wsize) + } + + if *wsize % 2 == 0 { + *wsize++ + } + + // TODO: come up with a way to set a good ksize automatically + + var thresh image.Image + thresh = preproc.IntegralSauvola(gray, *ksize, *wsize) + + if *btype == "zeroinv" { + thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA)) + if err != nil { + log.Fatal(err) + } + } + + f, err = os.Create(flag.Arg(1)) + if err != nil { + log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) + } + defer f.Close() + err = png.Encode(f, thresh) + if err != nil { + log.Fatalf("Could not encode image: %v\n", err) + } +} diff --git a/preproc/helpers_test.go b/preproc/helpers_test.go new file mode 100644 index 0000000..326b59d --- /dev/null +++ b/preproc/helpers_test.go @@ -0,0 +1,56 @@ +package preproc + +// TODO: add different pages as test cases +// TODO: test non integral img version + +import ( + "flag" + "image" + "image/draw" + "image/png" + "os" +) + +var update = flag.Bool("update", false, "update golden files") + +func decode(s string) (*image.Gray, error) { + f, err := os.Open(s) + defer f.Close() + if err != nil { + return nil, err + } + img, err := png.Decode(f) + if err != nil { + return nil, err + } + b := img.Bounds() + gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) + draw.Draw(gray, b, img, b.Min, draw.Src) + return gray, nil +} + +func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { + b := img1.Bounds() + if !b.Eq(img2.Bounds()) { + return false + } + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + r0, g0, b0, a0 := img1.At(x, y).RGBA() + r1, g1, b1, a1 := img2.At(x, y).RGBA() + if r0 != r1 { + return false + } + if g0 != g1 { + return false + } + if b0 != b1 { + return false + } + if a0 != a1 { + return false + } + } + } + return true +} diff --git a/preproc/sauvola.go b/preproc/sauvola.go new file mode 100644 index 0000000..e93ea81 --- /dev/null +++ b/preproc/sauvola.go @@ -0,0 +1,55 @@ +package preproc + +import ( + "image" + "image/color" + + "rescribe.xyz/go.git/integralimg" +) + +// Implements Sauvola's algorithm for text binarization, see paper +// "Adaptive document image binarization" (2000) +func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { + b := img.Bounds() + new := image.NewGray(b) + + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + window := surrounding(img, x, y, windowsize) + m, dev := meanstddev(window) + threshold := m * (1 + ksize * ((dev / 128) - 1)) + if img.GrayAt(x, y).Y < uint8(threshold) { + new.SetGray(x, y, color.Gray{0}) + } else { + new.SetGray(x, y, color.Gray{255}) + } + } + } + + return new +} + +// Implements Sauvola's algorithm using Integral Images, see paper +// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" +// and +// https://stackoverflow.com/questions/13110733/computing-image-integral +func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { + b := img.Bounds() + new := image.NewGray(b) + + integrals := integralimg.ToAllIntegralImg(img) + + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + m, dev := integrals.MeanStdDevWindow(x, y, windowsize) + threshold := m * (1 + ksize * ((dev / 128) - 1)) + if img.GrayAt(x, y).Y < uint8(threshold) { + new.SetGray(x, y, color.Gray{0}) + } else { + new.SetGray(x, y, color.Gray{255}) + } + } + } + + return new +} diff --git a/preproc/sauvola_test.go b/preproc/sauvola_test.go new file mode 100644 index 0000000..1397a4f --- /dev/null +++ b/preproc/sauvola_test.go @@ -0,0 +1,62 @@ +package preproc + +import ( + "fmt" + "image" + "image/png" + "os" + "testing" +) + +func TestBinarization(t *testing.T) { + cases := []struct { + name string + orig string + golden string + ksize float64 + wsize int + }{ + {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41}, + {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19}, + {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19}, + {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41}, + {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19}, + {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19}, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) { + var actual *image.Gray + orig, err := decode(c.orig) + if err != nil { + t.Fatalf("Could not open file %s: %v\n", c.orig, err) + } + switch c.name { + case "integralsauvola": + actual = IntegralSauvola(orig, c.ksize, c.wsize) + case "sauvola": + actual = Sauvola(orig, c.ksize, c.wsize) + default: + t.Fatalf("No method %s\n", c.name) + } + if *update { + f, err := os.Create(c.golden) + defer f.Close() + if err != nil { + t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) + } + err = png.Encode(f, actual) + if err != nil { + t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) + } + } + golden, err := decode(c.golden) + if err != nil { + t.Fatalf("Could not open file %s: %v\n", c.golden, err) + } + if ! imgsequal(golden, actual) { + t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden) + } + }) + } +} diff --git a/preproc/testdata/pg1.png b/preproc/testdata/pg1.png index c7c4249..2bcc4b1 100644 Binary files a/preproc/testdata/pg1.png and b/preproc/testdata/pg1.png differ diff --git a/preproc/testdata/pg1_integralsauvola_k0.3_w19.png b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png new file mode 100644 index 0000000..bdf5712 Binary files /dev/null and b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png differ diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w19.png b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png new file mode 100644 index 0000000..5db2d9a Binary files /dev/null and b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png differ diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w41.png b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png new file mode 100644 index 0000000..050d037 Binary files /dev/null and b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png differ diff --git a/preproc/testdata/pg1_integralwipesides_t0.02_w5.png b/preproc/testdata/pg1_integralwipesides_t0.02_w5.png deleted file mode 100644 index 6b4ccb2..0000000 Binary files a/preproc/testdata/pg1_integralwipesides_t0.02_w5.png and /dev/null differ diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w25.png b/preproc/testdata/pg1_integralwipesides_t0.05_w25.png deleted file mode 100644 index 39dc88d..0000000 Binary files a/preproc/testdata/pg1_integralwipesides_t0.05_w25.png and /dev/null differ diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w5.png b/preproc/testdata/pg1_integralwipesides_t0.05_w5.png deleted file mode 100644 index 50df855..0000000 Binary files a/preproc/testdata/pg1_integralwipesides_t0.05_w5.png and /dev/null differ diff --git a/preproc/testdata/pg1_sauvola_k0.3_w19.png b/preproc/testdata/pg1_sauvola_k0.3_w19.png new file mode 100644 index 0000000..bcd595f Binary files /dev/null and b/preproc/testdata/pg1_sauvola_k0.3_w19.png differ diff --git a/preproc/testdata/pg1_sauvola_k0.5_w19.png b/preproc/testdata/pg1_sauvola_k0.5_w19.png new file mode 100644 index 0000000..8de596c Binary files /dev/null and b/preproc/testdata/pg1_sauvola_k0.5_w19.png differ diff --git a/preproc/testdata/pg1_sauvola_k0.5_w41.png b/preproc/testdata/pg1_sauvola_k0.5_w41.png new file mode 100644 index 0000000..b8f50e0 Binary files /dev/null and b/preproc/testdata/pg1_sauvola_k0.5_w41.png differ diff --git a/preproc/testdata/pg2.png b/preproc/testdata/pg2.png new file mode 100644 index 0000000..c7c4249 Binary files /dev/null and b/preproc/testdata/pg2.png differ diff --git a/preproc/testdata/pg2_integralwipesides_t0.02_w5.png b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png new file mode 100644 index 0000000..6b4ccb2 Binary files /dev/null and b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png differ diff --git a/preproc/testdata/pg2_integralwipesides_t0.05_w25.png b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png new file mode 100644 index 0000000..39dc88d Binary files /dev/null and b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png differ diff --git a/preproc/testdata/pg2_integralwipesides_t0.05_w5.png b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png new file mode 100644 index 0000000..50df855 Binary files /dev/null and b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png differ diff --git a/preproc/util.go b/preproc/util.go new file mode 100644 index 0000000..5f8a9f1 --- /dev/null +++ b/preproc/util.go @@ -0,0 +1,95 @@ +package preproc + +import ( + "errors" + "image" + "math" +) + +// TODO: name better; maybe verb, x-er +// TODO: implement these for regular image, and use them to make +// image functions generic for integral and non- images +type UsefulImg interface { + MeanWindow() + MeanStdDevWindow() +} + +func mean(i []int) float64 { + sum := 0 + for _, n := range i { + sum += n + } + return float64(sum) / float64(len(i)) +} + +func stddev(i []int) float64 { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := sum / float64(len(i) - 1) + return math.Sqrt(variance) +} + +func meanstddev(i []int) (float64, float64) { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := float64(sum) / float64(len(i) - 1) + return m, math.Sqrt(variance) +} + +// gets the pixel values surrounding a point in the image +func surrounding(img *image.Gray, x int, y int, size int) []int { + b := img.Bounds() + step := size / 2 + + miny := y - step + if miny < b.Min.Y { + miny = b.Min.Y + } + minx := x - step + if minx < b.Min.X { + minx = b.Min.X + } + maxy := y + step + if maxy > b.Max.Y { + maxy = b.Max.Y + } + maxx := x + step + if maxx > b.Max.X { + maxx = b.Max.X + } + + var s []int + for yi := miny; yi <= maxy; yi++ { + for xi := minx; xi <= maxx; xi++ { + s = append(s, int(img.GrayAt(xi, yi).Y)) + } + } + return s +} + +func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) { + b := bin.Bounds() + if ! b.Eq(orig.Bounds()) { + return orig, errors.New("bin and orig images need to be the same dimensions") + } + newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy())) + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + if bin.GrayAt(x, y).Y == 255 { + newimg.Set(x, y, bin.GrayAt(x, y)) + } else { + newimg.Set(x, y, orig.At(x, y)) + } + } + } + + return newimg, nil +} diff --git a/preproc/wipesides.go b/preproc/wipesides.go index c773054..4806e93 100644 --- a/preproc/wipesides.go +++ b/preproc/wipesides.go @@ -7,7 +7,7 @@ import ( "image" "image/color" - "rescribe.xyz/go.git/binarize" + "rescribe.xyz/go.git/integralimg" ) type IntWindow struct { // TODO: put this in its own package @@ -126,7 +126,7 @@ func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray { // wipe fills the sections of image which fall outside the content // area with white func Wipe(img *image.Gray, wsize int, thresh float64) *image.Gray { - integral := binarize.Integralimg(img) + integral := integralimg.ToIntegralImg(img) lowedge, highedge := findedges(integral, wsize, thresh) return wipesides(img, lowedge, highedge) } diff --git a/preproc/wipesides_test.go b/preproc/wipesides_test.go index b0ada4e..f66f39b 100644 --- a/preproc/wipesides_test.go +++ b/preproc/wipesides_test.go @@ -4,59 +4,13 @@ package preproc // TODO: test non integral img version import ( - "flag" "fmt" "image" - "image/draw" "image/png" "os" "testing" ) -var update = flag.Bool("update", false, "update golden files") - -func decode(s string) (*image.Gray, error) { - f, err := os.Open(s) - defer f.Close() - if err != nil { - return nil, err - } - img, err := png.Decode(f) - if err != nil { - return nil, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - return gray, nil -} - -func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { - b := img1.Bounds() - if !b.Eq(img2.Bounds()) { - return false - } - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - r0, g0, b0, a0 := img1.At(x, y).RGBA() - r1, g1, b1, a1 := img2.At(x, y).RGBA() - if r0 != r1 { - return false - } - if g0 != g1 { - return false - } - if b0 != b1 { - return false - } - if a0 != a1 { - return false - } - } - } - return true -} - func TestWipeSides(t *testing.T) { cases := []struct { name string @@ -65,9 +19,9 @@ func TestWipeSides(t *testing.T) { thresh float64 wsize int }{ - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.02_w5.png", 0.02, 5}, - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w5.png", 0.05, 5}, - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w25.png", 0.05, 25}, + {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.02_w5.png", 0.02, 5}, + {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w5.png", 0.05, 5}, + {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w25.png", 0.05, 25}, } for _, c := range cases { -- cgit v1.2.1-24-ge1ad