diff options
Diffstat (limited to 'binarize')
-rw-r--r-- | binarize/integralimg.go | 116 | ||||
-rw-r--r-- | binarize/main.go (renamed from binarize/binarize.go) | 18 | ||||
-rw-r--r-- | binarize/sauvola.go | 87 | ||||
-rw-r--r-- | binarize/util.go | 67 |
4 files changed, 219 insertions, 69 deletions
diff --git a/binarize/integralimg.go b/binarize/integralimg.go new file mode 100644 index 0000000..c585d60 --- /dev/null +++ b/binarize/integralimg.go @@ -0,0 +1,116 @@ +package main + +import ( + "image" + "math" +) + +type integralwindow struct { + topleft uint64 + topright uint64 + bottomleft uint64 + bottomright uint64 + width int + height int +} + +func integralimg(img *image.Gray) [][]uint64 { + b := img.Bounds() + var oldy, oldx, oldxy uint64 + var integral [][]uint64 + for y := b.Min.Y; y < b.Max.Y; y++ { + newrow := []uint64{} + for x := b.Min.X; x < b.Max.X; x++ { + oldx, oldy, oldxy = 0, 0, 0 + if x > 0 { + oldx = newrow[x-1] + } + if y > 0 { + oldy = integral[y-1][x] + } + if x > 0 && y > 0 { + oldxy = integral[y-1][x-1] + } + pixel := uint64(img.GrayAt(x, y).Y) + i := pixel + oldx + oldy - oldxy + newrow = append(newrow, i) + } + integral = append(integral, newrow) + } + return integral +} + +func integralimgsq(img *image.Gray) [][]uint64 { + b := img.Bounds() + var oldy, oldx, oldxy uint64 + var integral [][]uint64 + for y := b.Min.Y; y < b.Max.Y; y++ { + newrow := []uint64{} + for x := b.Min.X; x < b.Max.X; x++ { + oldx, oldy, oldxy = 0, 0, 0 + if x > 0 { + oldx = newrow[x-1] + } + if y > 0 { + oldy = integral[y-1][x] + } + if x > 0 && y > 0 { + oldxy = integral[y-1][x-1] + } + pixel := uint64(img.GrayAt(x, y).Y) + i := pixel * pixel + oldx + oldy - oldxy + newrow = append(newrow, i) + } + integral = append(integral, newrow) + } + return integral +} + +// this gets the values of the four corners of a window, which can +// be used to quickly calculate the mean of the area +func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow { + step := size / 2 + + minx, miny := 0, 0 + maxy := len(integral)-1 + maxx := len(integral[0])-1 + + if y > (step+1) { + miny = y - step - 1 + } + if x > (step+1) { + minx = x - step - 1 + } + + if maxy > (y + step) { + maxy = y + step + } + if maxx > (x + step) { + maxx = x + step + } + + return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny} +} + +func integralmean(integral [][]uint64, x int, y int, size int) float64 { + i := getintegralwindow(integral, x, y, size) + total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft) + sqsize := float64(i.width) * float64(i.height) + return total / sqsize +} + +func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) { + i := getintegralwindow(integral, x, y, size) + isq := getintegralwindow(integralsq, x, y, size) + + var total, sqtotal, sqsize float64 + + sqsize = float64(i.width) * float64(i.height) + + total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft) + sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft) + + mean := total / sqsize + variance := (sqtotal / sqsize) - (mean * mean) + return mean, math.Sqrt(variance) +} diff --git a/binarize/binarize.go b/binarize/main.go index fa8a30f..610effc 100644 --- a/binarize/binarize.go +++ b/binarize/main.go @@ -1,12 +1,15 @@ package main +// TODO: could look into other algorithms, see for examples see +// the README at https://github.com/brandonmpetty/Doxa + import ( "flag" "fmt" "log" "os" - "github.com/Ernyoke/Imger/imgio" + "github.com/Ernyoke/Imger/imgio" // TODO: get rid of this and do things myself ) func main() { @@ -14,7 +17,7 @@ func main() { fmt.Fprintf(os.Stderr, "Usage: binarize [-w num] [-k num] inimg outimg\n") flag.PrintDefaults() } - wsize := flag.Int("w", 31, "Window size for sauvola algorithm") + wsize := flag.Int("w", 31, "Window size for sauvola algorithm (needs to be odd)") ksize := flag.Float64("k", 0.5, "K for sauvola algorithm") flag.Parse() if flag.NArg() < 2 { @@ -22,16 +25,17 @@ func main() { os.Exit(1) } + if *wsize % 2 == 0 { + *wsize++ + } + img, err := imgio.ImreadGray(flag.Arg(0)) if err != nil { log.Fatalf("Could not read image %s\n", flag.Arg(0)) } - // TODO: should be able to estimate an appropriate window size based on resolution - thresh := Sauvola(img, *ksize, *wsize) - if err != nil { - log.Fatal("Error binarising image\n") - } + // TODO: estimate an appropriate window size based on resolution + thresh := IntegralSauvola(img, *ksize, *wsize) err = imgio.Imwrite(thresh, flag.Arg(1)) if err != nil { diff --git a/binarize/sauvola.go b/binarize/sauvola.go index f1d0512..bc311ad 100644 --- a/binarize/sauvola.go +++ b/binarize/sauvola.go @@ -3,81 +3,44 @@ package main import ( "image" "image/color" - "math" ) -func mean(i []int) float64 { - sum := 0 - for _, n := range i { - sum += n - } - return float64(sum) / float64(len(i)) -} - -// TODO: is there a prettier way of doing this than float64() all over the place? -func stddev(i []int) float64 { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i) - 1) - return math.Sqrt(variance) -} - -func meanstddev(i []int) (float64, float64) { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i) - 1) - return m, math.Sqrt(variance) -} - -// gets the pixel values surrounding a point in the image -func surrounding(img *image.Gray, x int, y int, size int) []int { +// Implements Sauvola's algorithm for text binarization, see paper +// "Adaptive document image binarization" (2000) +func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { b := img.Bounds() + new := image.NewGray(b) - miny := y - size/2 - if miny < b.Min.Y { - miny = b.Min.Y - } - minx := x - size/2 - if minx < b.Min.X { - minx = b.Min.X - } - maxy := y + size/2 - if maxy > b.Max.Y { - maxy = b.Max.Y - } - maxx := x + size/2 - if maxx > b.Max.X { - maxx = b.Max.X - } - - var s []int - for yi := miny; yi < maxy; yi++ { - for xi := minx; xi < maxx; xi++ { - s = append(s, int(img.GrayAt(xi, yi).Y)) + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + window := surrounding(img, x, y, windowsize) + m, dev := meanstddev(window) + threshold := m * (1 + ksize * ((dev / 128) - 1)) + if img.GrayAt(x, y).Y < uint8(threshold) { + new.SetGray(x, y, color.Gray{0}) + } else { + new.SetGray(x, y, color.Gray{255}) + } } } - return s + + return new } -// TODO: parallelize -// TODO: switch to using integral images to make faster; see paper -// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" -func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { +// Implements Sauvola's algorithm using Integral Images, see paper +// "Effcient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" +// and +// https://stackoverflow.com/questions/13110733/computing-image-integral +func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { b := img.Bounds() new := image.NewGray(b) + integral := integralimg(img) + integralsq := integralimgsq(img) + for y := b.Min.Y; y < b.Max.Y; y++ { for x := b.Min.X; x < b.Max.X; x++ { - window := surrounding(img, x, y, windowsize) - m, dev := meanstddev(window) + m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize) threshold := m * (1 + ksize * ((dev / 128) - 1)) if img.GrayAt(x, y).Y < uint8(threshold) { new.SetGray(x, y, color.Gray{0}) diff --git a/binarize/util.go b/binarize/util.go new file mode 100644 index 0000000..e7cf0f8 --- /dev/null +++ b/binarize/util.go @@ -0,0 +1,67 @@ +package main + +import ( + "image" + "math" +) + +func mean(i []int) float64 { + sum := 0 + for _, n := range i { + sum += n + } + return float64(sum) / float64(len(i)) +} + +func stddev(i []int) float64 { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := sum / float64(len(i) - 1) + return math.Sqrt(variance) +} + +func meanstddev(i []int) (float64, float64) { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := float64(sum) / float64(len(i) - 1) + return m, math.Sqrt(variance) +} + +// gets the pixel values surrounding a point in the image +func surrounding(img *image.Gray, x int, y int, size int) []int { + b := img.Bounds() + step := size / 2 + + miny := y - step + if miny < b.Min.Y { + miny = b.Min.Y + } + minx := x - step + if minx < b.Min.X { + minx = b.Min.X + } + maxy := y + step + if maxy > b.Max.Y { + maxy = b.Max.Y + } + maxx := x + step + if maxx > b.Max.X { + maxx = b.Max.X + } + + var s []int + for yi := miny; yi <= maxy; yi++ { + for xi := minx; xi <= maxx; xi++ { + s = append(s, int(img.GrayAt(xi, yi).Y)) + } + } + return s +} |