From 6bb8ffba746fbcea8a8a359a32afdd591dab0f25 Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 30 Jan 2019 16:42:02 +0000 Subject: Add integral image functionality to enable massive speedup of Sauvola Note that there are some very small differences to the output compared to the basic algorithm, but this doesn't make much difference. This is due to minor differences with the standard deviation calculation throughout, and with mean calculation at edges, for reasons I'm unclear about. WIP integral image speedup. mean is working Very WIP, but mean is perfect once full window is used Integral version all working! Remove debugging info Organise code better --- binarize/binarize.go | 40 ----------------- binarize/integralimg.go | 116 ++++++++++++++++++++++++++++++++++++++++++++++++ binarize/main.go | 44 ++++++++++++++++++ binarize/sauvola.go | 87 +++++++++++------------------------- binarize/util.go | 67 ++++++++++++++++++++++++++++ 5 files changed, 252 insertions(+), 102 deletions(-) delete mode 100644 binarize/binarize.go create mode 100644 binarize/integralimg.go create mode 100644 binarize/main.go create mode 100644 binarize/util.go (limited to 'binarize') diff --git a/binarize/binarize.go b/binarize/binarize.go deleted file mode 100644 index fa8a30f..0000000 --- a/binarize/binarize.go +++ /dev/null @@ -1,40 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "os" - - "github.com/Ernyoke/Imger/imgio" -) - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: binarize [-w num] [-k num] inimg outimg\n") - flag.PrintDefaults() - } - wsize := flag.Int("w", 31, "Window size for sauvola algorithm") - ksize := flag.Float64("k", 0.5, "K for sauvola algorithm") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - img, err := imgio.ImreadGray(flag.Arg(0)) - if err != nil { - log.Fatalf("Could not read image %s\n", flag.Arg(0)) - } - - // TODO: should be able to estimate an appropriate window size based on resolution - thresh := Sauvola(img, *ksize, *wsize) - if err != nil { - log.Fatal("Error binarising image\n") - } - - err = imgio.Imwrite(thresh, flag.Arg(1)) - if err != nil { - log.Fatalf("Could not write image %s\n", flag.Arg(1)) - } -} diff --git a/binarize/integralimg.go b/binarize/integralimg.go new file mode 100644 index 0000000..c585d60 --- /dev/null +++ b/binarize/integralimg.go @@ -0,0 +1,116 @@ +package main + +import ( + "image" + "math" +) + +type integralwindow struct { + topleft uint64 + topright uint64 + bottomleft uint64 + bottomright uint64 + width int + height int +} + +func integralimg(img *image.Gray) [][]uint64 { + b := img.Bounds() + var oldy, oldx, oldxy uint64 + var integral [][]uint64 + for y := b.Min.Y; y < b.Max.Y; y++ { + newrow := []uint64{} + for x := b.Min.X; x < b.Max.X; x++ { + oldx, oldy, oldxy = 0, 0, 0 + if x > 0 { + oldx = newrow[x-1] + } + if y > 0 { + oldy = integral[y-1][x] + } + if x > 0 && y > 0 { + oldxy = integral[y-1][x-1] + } + pixel := uint64(img.GrayAt(x, y).Y) + i := pixel + oldx + oldy - oldxy + newrow = append(newrow, i) + } + integral = append(integral, newrow) + } + return integral +} + +func integralimgsq(img *image.Gray) [][]uint64 { + b := img.Bounds() + var oldy, oldx, oldxy uint64 + var integral [][]uint64 + for y := b.Min.Y; y < b.Max.Y; y++ { + newrow := []uint64{} + for x := b.Min.X; x < b.Max.X; x++ { + oldx, oldy, oldxy = 0, 0, 0 + if x > 0 { + oldx = newrow[x-1] + } + if y > 0 { + oldy = integral[y-1][x] + } + if x > 0 && y > 0 { + oldxy = integral[y-1][x-1] + } + pixel := uint64(img.GrayAt(x, y).Y) + i := pixel * pixel + oldx + oldy - oldxy + newrow = append(newrow, i) + } + integral = append(integral, newrow) + } + return integral +} + +// this gets the values of the four corners of a window, which can +// be used to quickly calculate the mean of the area +func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow { + step := size / 2 + + minx, miny := 0, 0 + maxy := len(integral)-1 + maxx := len(integral[0])-1 + + if y > (step+1) { + miny = y - step - 1 + } + if x > (step+1) { + minx = x - step - 1 + } + + if maxy > (y + step) { + maxy = y + step + } + if maxx > (x + step) { + maxx = x + step + } + + return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny} +} + +func integralmean(integral [][]uint64, x int, y int, size int) float64 { + i := getintegralwindow(integral, x, y, size) + total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft) + sqsize := float64(i.width) * float64(i.height) + return total / sqsize +} + +func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) { + i := getintegralwindow(integral, x, y, size) + isq := getintegralwindow(integralsq, x, y, size) + + var total, sqtotal, sqsize float64 + + sqsize = float64(i.width) * float64(i.height) + + total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft) + sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft) + + mean := total / sqsize + variance := (sqtotal / sqsize) - (mean * mean) + return mean, math.Sqrt(variance) +} diff --git a/binarize/main.go b/binarize/main.go new file mode 100644 index 0000000..610effc --- /dev/null +++ b/binarize/main.go @@ -0,0 +1,44 @@ +package main + +// TODO: could look into other algorithms, see for examples see +// the README at https://github.com/brandonmpetty/Doxa + +import ( + "flag" + "fmt" + "log" + "os" + + "github.com/Ernyoke/Imger/imgio" // TODO: get rid of this and do things myself +) + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: binarize [-w num] [-k num] inimg outimg\n") + flag.PrintDefaults() + } + wsize := flag.Int("w", 31, "Window size for sauvola algorithm (needs to be odd)") + ksize := flag.Float64("k", 0.5, "K for sauvola algorithm") + flag.Parse() + if flag.NArg() < 2 { + flag.Usage() + os.Exit(1) + } + + if *wsize % 2 == 0 { + *wsize++ + } + + img, err := imgio.ImreadGray(flag.Arg(0)) + if err != nil { + log.Fatalf("Could not read image %s\n", flag.Arg(0)) + } + + // TODO: estimate an appropriate window size based on resolution + thresh := IntegralSauvola(img, *ksize, *wsize) + + err = imgio.Imwrite(thresh, flag.Arg(1)) + if err != nil { + log.Fatalf("Could not write image %s\n", flag.Arg(1)) + } +} diff --git a/binarize/sauvola.go b/binarize/sauvola.go index f1d0512..bc311ad 100644 --- a/binarize/sauvola.go +++ b/binarize/sauvola.go @@ -3,81 +3,44 @@ package main import ( "image" "image/color" - "math" ) -func mean(i []int) float64 { - sum := 0 - for _, n := range i { - sum += n - } - return float64(sum) / float64(len(i)) -} - -// TODO: is there a prettier way of doing this than float64() all over the place? -func stddev(i []int) float64 { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i) - 1) - return math.Sqrt(variance) -} - -func meanstddev(i []int) (float64, float64) { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i) - 1) - return m, math.Sqrt(variance) -} - -// gets the pixel values surrounding a point in the image -func surrounding(img *image.Gray, x int, y int, size int) []int { +// Implements Sauvola's algorithm for text binarization, see paper +// "Adaptive document image binarization" (2000) +func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { b := img.Bounds() + new := image.NewGray(b) - miny := y - size/2 - if miny < b.Min.Y { - miny = b.Min.Y - } - minx := x - size/2 - if minx < b.Min.X { - minx = b.Min.X - } - maxy := y + size/2 - if maxy > b.Max.Y { - maxy = b.Max.Y - } - maxx := x + size/2 - if maxx > b.Max.X { - maxx = b.Max.X - } - - var s []int - for yi := miny; yi < maxy; yi++ { - for xi := minx; xi < maxx; xi++ { - s = append(s, int(img.GrayAt(xi, yi).Y)) + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + window := surrounding(img, x, y, windowsize) + m, dev := meanstddev(window) + threshold := m * (1 + ksize * ((dev / 128) - 1)) + if img.GrayAt(x, y).Y < uint8(threshold) { + new.SetGray(x, y, color.Gray{0}) + } else { + new.SetGray(x, y, color.Gray{255}) + } } } - return s + + return new } -// TODO: parallelize -// TODO: switch to using integral images to make faster; see paper -// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" -func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { +// Implements Sauvola's algorithm using Integral Images, see paper +// "Effcient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" +// and +// https://stackoverflow.com/questions/13110733/computing-image-integral +func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { b := img.Bounds() new := image.NewGray(b) + integral := integralimg(img) + integralsq := integralimgsq(img) + for y := b.Min.Y; y < b.Max.Y; y++ { for x := b.Min.X; x < b.Max.X; x++ { - window := surrounding(img, x, y, windowsize) - m, dev := meanstddev(window) + m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize) threshold := m * (1 + ksize * ((dev / 128) - 1)) if img.GrayAt(x, y).Y < uint8(threshold) { new.SetGray(x, y, color.Gray{0}) diff --git a/binarize/util.go b/binarize/util.go new file mode 100644 index 0000000..e7cf0f8 --- /dev/null +++ b/binarize/util.go @@ -0,0 +1,67 @@ +package main + +import ( + "image" + "math" +) + +func mean(i []int) float64 { + sum := 0 + for _, n := range i { + sum += n + } + return float64(sum) / float64(len(i)) +} + +func stddev(i []int) float64 { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := sum / float64(len(i) - 1) + return math.Sqrt(variance) +} + +func meanstddev(i []int) (float64, float64) { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := float64(sum) / float64(len(i) - 1) + return m, math.Sqrt(variance) +} + +// gets the pixel values surrounding a point in the image +func surrounding(img *image.Gray, x int, y int, size int) []int { + b := img.Bounds() + step := size / 2 + + miny := y - step + if miny < b.Min.Y { + miny = b.Min.Y + } + minx := x - step + if minx < b.Min.X { + minx = b.Min.X + } + maxy := y + step + if maxy > b.Max.Y { + maxy = b.Max.Y + } + maxx := x + step + if maxx > b.Max.X { + maxx = b.Max.X + } + + var s []int + for yi := miny; yi <= maxy; yi++ { + for xi := minx; xi <= maxx; xi++ { + s = append(s, int(img.GrayAt(xi, yi).Y)) + } + } + return s +} -- cgit v1.2.1-24-ge1ad