summaryrefslogtreecommitdiff
path: root/binarize
diff options
context:
space:
mode:
Diffstat (limited to 'binarize')
-rw-r--r--binarize/integralimg.go116
-rw-r--r--binarize/main.go (renamed from binarize/binarize.go)18
-rw-r--r--binarize/sauvola.go87
-rw-r--r--binarize/util.go67
4 files changed, 219 insertions, 69 deletions
diff --git a/binarize/integralimg.go b/binarize/integralimg.go
new file mode 100644
index 0000000..c585d60
--- /dev/null
+++ b/binarize/integralimg.go
@@ -0,0 +1,116 @@
+package main
+
+import (
+ "image"
+ "math"
+)
+
+type integralwindow struct {
+ topleft uint64
+ topright uint64
+ bottomleft uint64
+ bottomright uint64
+ width int
+ height int
+}
+
+func integralimg(img *image.Gray) [][]uint64 {
+ b := img.Bounds()
+ var oldy, oldx, oldxy uint64
+ var integral [][]uint64
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ newrow := []uint64{}
+ for x := b.Min.X; x < b.Max.X; x++ {
+ oldx, oldy, oldxy = 0, 0, 0
+ if x > 0 {
+ oldx = newrow[x-1]
+ }
+ if y > 0 {
+ oldy = integral[y-1][x]
+ }
+ if x > 0 && y > 0 {
+ oldxy = integral[y-1][x-1]
+ }
+ pixel := uint64(img.GrayAt(x, y).Y)
+ i := pixel + oldx + oldy - oldxy
+ newrow = append(newrow, i)
+ }
+ integral = append(integral, newrow)
+ }
+ return integral
+}
+
+func integralimgsq(img *image.Gray) [][]uint64 {
+ b := img.Bounds()
+ var oldy, oldx, oldxy uint64
+ var integral [][]uint64
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ newrow := []uint64{}
+ for x := b.Min.X; x < b.Max.X; x++ {
+ oldx, oldy, oldxy = 0, 0, 0
+ if x > 0 {
+ oldx = newrow[x-1]
+ }
+ if y > 0 {
+ oldy = integral[y-1][x]
+ }
+ if x > 0 && y > 0 {
+ oldxy = integral[y-1][x-1]
+ }
+ pixel := uint64(img.GrayAt(x, y).Y)
+ i := pixel * pixel + oldx + oldy - oldxy
+ newrow = append(newrow, i)
+ }
+ integral = append(integral, newrow)
+ }
+ return integral
+}
+
+// this gets the values of the four corners of a window, which can
+// be used to quickly calculate the mean of the area
+func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow {
+ step := size / 2
+
+ minx, miny := 0, 0
+ maxy := len(integral)-1
+ maxx := len(integral[0])-1
+
+ if y > (step+1) {
+ miny = y - step - 1
+ }
+ if x > (step+1) {
+ minx = x - step - 1
+ }
+
+ if maxy > (y + step) {
+ maxy = y + step
+ }
+ if maxx > (x + step) {
+ maxx = x + step
+ }
+
+ return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny}
+}
+
+func integralmean(integral [][]uint64, x int, y int, size int) float64 {
+ i := getintegralwindow(integral, x, y, size)
+ total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft)
+ sqsize := float64(i.width) * float64(i.height)
+ return total / sqsize
+}
+
+func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) {
+ i := getintegralwindow(integral, x, y, size)
+ isq := getintegralwindow(integralsq, x, y, size)
+
+ var total, sqtotal, sqsize float64
+
+ sqsize = float64(i.width) * float64(i.height)
+
+ total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft)
+ sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft)
+
+ mean := total / sqsize
+ variance := (sqtotal / sqsize) - (mean * mean)
+ return mean, math.Sqrt(variance)
+}
diff --git a/binarize/binarize.go b/binarize/main.go
index fa8a30f..610effc 100644
--- a/binarize/binarize.go
+++ b/binarize/main.go
@@ -1,12 +1,15 @@
package main
+// TODO: could look into other algorithms, see for examples see
+// the README at https://github.com/brandonmpetty/Doxa
+
import (
"flag"
"fmt"
"log"
"os"
- "github.com/Ernyoke/Imger/imgio"
+ "github.com/Ernyoke/Imger/imgio" // TODO: get rid of this and do things myself
)
func main() {
@@ -14,7 +17,7 @@ func main() {
fmt.Fprintf(os.Stderr, "Usage: binarize [-w num] [-k num] inimg outimg\n")
flag.PrintDefaults()
}
- wsize := flag.Int("w", 31, "Window size for sauvola algorithm")
+ wsize := flag.Int("w", 31, "Window size for sauvola algorithm (needs to be odd)")
ksize := flag.Float64("k", 0.5, "K for sauvola algorithm")
flag.Parse()
if flag.NArg() < 2 {
@@ -22,16 +25,17 @@ func main() {
os.Exit(1)
}
+ if *wsize % 2 == 0 {
+ *wsize++
+ }
+
img, err := imgio.ImreadGray(flag.Arg(0))
if err != nil {
log.Fatalf("Could not read image %s\n", flag.Arg(0))
}
- // TODO: should be able to estimate an appropriate window size based on resolution
- thresh := Sauvola(img, *ksize, *wsize)
- if err != nil {
- log.Fatal("Error binarising image\n")
- }
+ // TODO: estimate an appropriate window size based on resolution
+ thresh := IntegralSauvola(img, *ksize, *wsize)
err = imgio.Imwrite(thresh, flag.Arg(1))
if err != nil {
diff --git a/binarize/sauvola.go b/binarize/sauvola.go
index f1d0512..bc311ad 100644
--- a/binarize/sauvola.go
+++ b/binarize/sauvola.go
@@ -3,81 +3,44 @@ package main
import (
"image"
"image/color"
- "math"
)
-func mean(i []int) float64 {
- sum := 0
- for _, n := range i {
- sum += n
- }
- return float64(sum) / float64(len(i))
-}
-
-// TODO: is there a prettier way of doing this than float64() all over the place?
-func stddev(i []int) float64 {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := float64(sum) / float64(len(i) - 1)
- return math.Sqrt(variance)
-}
-
-func meanstddev(i []int) (float64, float64) {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := float64(sum) / float64(len(i) - 1)
- return m, math.Sqrt(variance)
-}
-
-// gets the pixel values surrounding a point in the image
-func surrounding(img *image.Gray, x int, y int, size int) []int {
+// Implements Sauvola's algorithm for text binarization, see paper
+// "Adaptive document image binarization" (2000)
+func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
b := img.Bounds()
+ new := image.NewGray(b)
- miny := y - size/2
- if miny < b.Min.Y {
- miny = b.Min.Y
- }
- minx := x - size/2
- if minx < b.Min.X {
- minx = b.Min.X
- }
- maxy := y + size/2
- if maxy > b.Max.Y {
- maxy = b.Max.Y
- }
- maxx := x + size/2
- if maxx > b.Max.X {
- maxx = b.Max.X
- }
-
- var s []int
- for yi := miny; yi < maxy; yi++ {
- for xi := minx; xi < maxx; xi++ {
- s = append(s, int(img.GrayAt(xi, yi).Y))
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ for x := b.Min.X; x < b.Max.X; x++ {
+ window := surrounding(img, x, y, windowsize)
+ m, dev := meanstddev(window)
+ threshold := m * (1 + ksize * ((dev / 128) - 1))
+ if img.GrayAt(x, y).Y < uint8(threshold) {
+ new.SetGray(x, y, color.Gray{0})
+ } else {
+ new.SetGray(x, y, color.Gray{255})
+ }
}
}
- return s
+
+ return new
}
-// TODO: parallelize
-// TODO: switch to using integral images to make faster; see paper
-// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
-func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
+// Implements Sauvola's algorithm using Integral Images, see paper
+// "Effcient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
+// and
+// https://stackoverflow.com/questions/13110733/computing-image-integral
+func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
b := img.Bounds()
new := image.NewGray(b)
+ integral := integralimg(img)
+ integralsq := integralimgsq(img)
+
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
- window := surrounding(img, x, y, windowsize)
- m, dev := meanstddev(window)
+ m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize)
threshold := m * (1 + ksize * ((dev / 128) - 1))
if img.GrayAt(x, y).Y < uint8(threshold) {
new.SetGray(x, y, color.Gray{0})
diff --git a/binarize/util.go b/binarize/util.go
new file mode 100644
index 0000000..e7cf0f8
--- /dev/null
+++ b/binarize/util.go
@@ -0,0 +1,67 @@
+package main
+
+import (
+ "image"
+ "math"
+)
+
+func mean(i []int) float64 {
+ sum := 0
+ for _, n := range i {
+ sum += n
+ }
+ return float64(sum) / float64(len(i))
+}
+
+func stddev(i []int) float64 {
+ m := mean(i)
+
+ var sum float64
+ for _, n := range i {
+ sum += (float64(n) - m) * (float64(n) - m)
+ }
+ variance := sum / float64(len(i) - 1)
+ return math.Sqrt(variance)
+}
+
+func meanstddev(i []int) (float64, float64) {
+ m := mean(i)
+
+ var sum float64
+ for _, n := range i {
+ sum += (float64(n) - m) * (float64(n) - m)
+ }
+ variance := float64(sum) / float64(len(i) - 1)
+ return m, math.Sqrt(variance)
+}
+
+// gets the pixel values surrounding a point in the image
+func surrounding(img *image.Gray, x int, y int, size int) []int {
+ b := img.Bounds()
+ step := size / 2
+
+ miny := y - step
+ if miny < b.Min.Y {
+ miny = b.Min.Y
+ }
+ minx := x - step
+ if minx < b.Min.X {
+ minx = b.Min.X
+ }
+ maxy := y + step
+ if maxy > b.Max.Y {
+ maxy = b.Max.Y
+ }
+ maxx := x + step
+ if maxx > b.Max.X {
+ maxx = b.Max.X
+ }
+
+ var s []int
+ for yi := miny; yi <= maxy; yi++ {
+ for xi := minx; xi <= maxx; xi++ {
+ s = append(s, int(img.GrayAt(xi, yi).Y))
+ }
+ }
+ return s
+}