summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-01-30 16:42:02 +0000
committerNick White <git@njw.name>2019-01-30 19:19:44 +0000
commit6bb8ffba746fbcea8a8a359a32afdd591dab0f25 (patch)
treee3c43d69b64d9e6fdc3e09e2a8e80122271568ea
parent26a61941cf0216202aee3378f17b05255170da17 (diff)
Add integral image functionality to enable massive speedup of Sauvola
Note that there are some very small differences to the output compared to the basic algorithm, but this doesn't make much difference. This is due to minor differences with the standard deviation calculation throughout, and with mean calculation at edges, for reasons I'm unclear about. WIP integral image speedup. mean is working Very WIP, but mean is perfect once full window is used Integral version all working! Remove debugging info Organise code better
-rw-r--r--binarize/integralimg.go116
-rw-r--r--binarize/main.go (renamed from binarize/binarize.go)18
-rw-r--r--binarize/sauvola.go87
-rw-r--r--binarize/util.go67
4 files changed, 219 insertions, 69 deletions
diff --git a/binarize/integralimg.go b/binarize/integralimg.go
new file mode 100644
index 0000000..c585d60
--- /dev/null
+++ b/binarize/integralimg.go
@@ -0,0 +1,116 @@
+package main
+
+import (
+ "image"
+ "math"
+)
+
+type integralwindow struct {
+ topleft uint64
+ topright uint64
+ bottomleft uint64
+ bottomright uint64
+ width int
+ height int
+}
+
+func integralimg(img *image.Gray) [][]uint64 {
+ b := img.Bounds()
+ var oldy, oldx, oldxy uint64
+ var integral [][]uint64
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ newrow := []uint64{}
+ for x := b.Min.X; x < b.Max.X; x++ {
+ oldx, oldy, oldxy = 0, 0, 0
+ if x > 0 {
+ oldx = newrow[x-1]
+ }
+ if y > 0 {
+ oldy = integral[y-1][x]
+ }
+ if x > 0 && y > 0 {
+ oldxy = integral[y-1][x-1]
+ }
+ pixel := uint64(img.GrayAt(x, y).Y)
+ i := pixel + oldx + oldy - oldxy
+ newrow = append(newrow, i)
+ }
+ integral = append(integral, newrow)
+ }
+ return integral
+}
+
+func integralimgsq(img *image.Gray) [][]uint64 {
+ b := img.Bounds()
+ var oldy, oldx, oldxy uint64
+ var integral [][]uint64
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ newrow := []uint64{}
+ for x := b.Min.X; x < b.Max.X; x++ {
+ oldx, oldy, oldxy = 0, 0, 0
+ if x > 0 {
+ oldx = newrow[x-1]
+ }
+ if y > 0 {
+ oldy = integral[y-1][x]
+ }
+ if x > 0 && y > 0 {
+ oldxy = integral[y-1][x-1]
+ }
+ pixel := uint64(img.GrayAt(x, y).Y)
+ i := pixel * pixel + oldx + oldy - oldxy
+ newrow = append(newrow, i)
+ }
+ integral = append(integral, newrow)
+ }
+ return integral
+}
+
+// this gets the values of the four corners of a window, which can
+// be used to quickly calculate the mean of the area
+func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow {
+ step := size / 2
+
+ minx, miny := 0, 0
+ maxy := len(integral)-1
+ maxx := len(integral[0])-1
+
+ if y > (step+1) {
+ miny = y - step - 1
+ }
+ if x > (step+1) {
+ minx = x - step - 1
+ }
+
+ if maxy > (y + step) {
+ maxy = y + step
+ }
+ if maxx > (x + step) {
+ maxx = x + step
+ }
+
+ return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny}
+}
+
+func integralmean(integral [][]uint64, x int, y int, size int) float64 {
+ i := getintegralwindow(integral, x, y, size)
+ total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft)
+ sqsize := float64(i.width) * float64(i.height)
+ return total / sqsize
+}
+
+func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) {
+ i := getintegralwindow(integral, x, y, size)
+ isq := getintegralwindow(integralsq, x, y, size)
+
+ var total, sqtotal, sqsize float64
+
+ sqsize = float64(i.width) * float64(i.height)
+
+ total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft)
+ sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft)
+
+ mean := total / sqsize
+ variance := (sqtotal / sqsize) - (mean * mean)
+ return mean, math.Sqrt(variance)
+}
diff --git a/binarize/binarize.go b/binarize/main.go
index fa8a30f..610effc 100644
--- a/binarize/binarize.go
+++ b/binarize/main.go
@@ -1,12 +1,15 @@
package main
+// TODO: could look into other algorithms, see for examples see
+// the README at https://github.com/brandonmpetty/Doxa
+
import (
"flag"
"fmt"
"log"
"os"
- "github.com/Ernyoke/Imger/imgio"
+ "github.com/Ernyoke/Imger/imgio" // TODO: get rid of this and do things myself
)
func main() {
@@ -14,7 +17,7 @@ func main() {
fmt.Fprintf(os.Stderr, "Usage: binarize [-w num] [-k num] inimg outimg\n")
flag.PrintDefaults()
}
- wsize := flag.Int("w", 31, "Window size for sauvola algorithm")
+ wsize := flag.Int("w", 31, "Window size for sauvola algorithm (needs to be odd)")
ksize := flag.Float64("k", 0.5, "K for sauvola algorithm")
flag.Parse()
if flag.NArg() < 2 {
@@ -22,16 +25,17 @@ func main() {
os.Exit(1)
}
+ if *wsize % 2 == 0 {
+ *wsize++
+ }
+
img, err := imgio.ImreadGray(flag.Arg(0))
if err != nil {
log.Fatalf("Could not read image %s\n", flag.Arg(0))
}
- // TODO: should be able to estimate an appropriate window size based on resolution
- thresh := Sauvola(img, *ksize, *wsize)
- if err != nil {
- log.Fatal("Error binarising image\n")
- }
+ // TODO: estimate an appropriate window size based on resolution
+ thresh := IntegralSauvola(img, *ksize, *wsize)
err = imgio.Imwrite(thresh, flag.Arg(1))
if err != nil {
diff --git a/binarize/sauvola.go b/binarize/sauvola.go
index f1d0512..bc311ad 100644
--- a/binarize/sauvola.go
+++ b/binarize/sauvola.go
@@ -3,81 +3,44 @@ package main
import (
"image"
"image/color"
- "math"
)
-func mean(i []int) float64 {
- sum := 0
- for _, n := range i {
- sum += n
- }
- return float64(sum) / float64(len(i))
-}
-
-// TODO: is there a prettier way of doing this than float64() all over the place?
-func stddev(i []int) float64 {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := float64(sum) / float64(len(i) - 1)
- return math.Sqrt(variance)
-}
-
-func meanstddev(i []int) (float64, float64) {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := float64(sum) / float64(len(i) - 1)
- return m, math.Sqrt(variance)
-}
-
-// gets the pixel values surrounding a point in the image
-func surrounding(img *image.Gray, x int, y int, size int) []int {
+// Implements Sauvola's algorithm for text binarization, see paper
+// "Adaptive document image binarization" (2000)
+func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
b := img.Bounds()
+ new := image.NewGray(b)
- miny := y - size/2
- if miny < b.Min.Y {
- miny = b.Min.Y
- }
- minx := x - size/2
- if minx < b.Min.X {
- minx = b.Min.X
- }
- maxy := y + size/2
- if maxy > b.Max.Y {
- maxy = b.Max.Y
- }
- maxx := x + size/2
- if maxx > b.Max.X {
- maxx = b.Max.X
- }
-
- var s []int
- for yi := miny; yi < maxy; yi++ {
- for xi := minx; xi < maxx; xi++ {
- s = append(s, int(img.GrayAt(xi, yi).Y))
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ for x := b.Min.X; x < b.Max.X; x++ {
+ window := surrounding(img, x, y, windowsize)
+ m, dev := meanstddev(window)
+ threshold := m * (1 + ksize * ((dev / 128) - 1))
+ if img.GrayAt(x, y).Y < uint8(threshold) {
+ new.SetGray(x, y, color.Gray{0})
+ } else {
+ new.SetGray(x, y, color.Gray{255})
+ }
}
}
- return s
+
+ return new
}
-// TODO: parallelize
-// TODO: switch to using integral images to make faster; see paper
-// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
-func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
+// Implements Sauvola's algorithm using Integral Images, see paper
+// "Effcient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
+// and
+// https://stackoverflow.com/questions/13110733/computing-image-integral
+func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
b := img.Bounds()
new := image.NewGray(b)
+ integral := integralimg(img)
+ integralsq := integralimgsq(img)
+
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
- window := surrounding(img, x, y, windowsize)
- m, dev := meanstddev(window)
+ m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize)
threshold := m * (1 + ksize * ((dev / 128) - 1))
if img.GrayAt(x, y).Y < uint8(threshold) {
new.SetGray(x, y, color.Gray{0})
diff --git a/binarize/util.go b/binarize/util.go
new file mode 100644
index 0000000..e7cf0f8
--- /dev/null
+++ b/binarize/util.go
@@ -0,0 +1,67 @@
+package main
+
+import (
+ "image"
+ "math"
+)
+
+func mean(i []int) float64 {
+ sum := 0
+ for _, n := range i {
+ sum += n
+ }
+ return float64(sum) / float64(len(i))
+}
+
+func stddev(i []int) float64 {
+ m := mean(i)
+
+ var sum float64
+ for _, n := range i {
+ sum += (float64(n) - m) * (float64(n) - m)
+ }
+ variance := sum / float64(len(i) - 1)
+ return math.Sqrt(variance)
+}
+
+func meanstddev(i []int) (float64, float64) {
+ m := mean(i)
+
+ var sum float64
+ for _, n := range i {
+ sum += (float64(n) - m) * (float64(n) - m)
+ }
+ variance := float64(sum) / float64(len(i) - 1)
+ return m, math.Sqrt(variance)
+}
+
+// gets the pixel values surrounding a point in the image
+func surrounding(img *image.Gray, x int, y int, size int) []int {
+ b := img.Bounds()
+ step := size / 2
+
+ miny := y - step
+ if miny < b.Min.Y {
+ miny = b.Min.Y
+ }
+ minx := x - step
+ if minx < b.Min.X {
+ minx = b.Min.X
+ }
+ maxy := y + step
+ if maxy > b.Max.Y {
+ maxy = b.Max.Y
+ }
+ maxx := x + step
+ if maxx > b.Max.X {
+ maxx = b.Max.X
+ }
+
+ var s []int
+ for yi := miny; yi <= maxy; yi++ {
+ for xi := minx; xi <= maxx; xi++ {
+ s = append(s, int(img.GrayAt(xi, yi).Y))
+ }
+ }
+ return s
+}