diff options
Diffstat (limited to 'binarize')
| -rw-r--r-- | binarize/integralimg.go | 116 | ||||
| -rw-r--r-- | binarize/main.go (renamed from binarize/binarize.go) | 18 | ||||
| -rw-r--r-- | binarize/sauvola.go | 87 | ||||
| -rw-r--r-- | binarize/util.go | 67 | 
4 files changed, 219 insertions, 69 deletions
| diff --git a/binarize/integralimg.go b/binarize/integralimg.go new file mode 100644 index 0000000..c585d60 --- /dev/null +++ b/binarize/integralimg.go @@ -0,0 +1,116 @@ +package main + +import ( +	"image" +	"math" +) + +type integralwindow struct { +	topleft uint64 +	topright uint64 +	bottomleft uint64 +	bottomright uint64 +	width int +	height int +} + +func integralimg(img *image.Gray) [][]uint64 { +	b := img.Bounds() +	var oldy, oldx, oldxy uint64 +	var integral [][]uint64 +	for y := b.Min.Y; y < b.Max.Y; y++ { +		newrow := []uint64{} +		for x := b.Min.X; x < b.Max.X; x++ { +			oldx, oldy, oldxy = 0, 0, 0 +			if x > 0 { +				oldx = newrow[x-1] +			} +			if y > 0 { +				oldy = integral[y-1][x] +			} +			if x > 0 && y > 0 { +				oldxy = integral[y-1][x-1] +			} +			pixel := uint64(img.GrayAt(x, y).Y) +			i := pixel + oldx + oldy - oldxy +			newrow = append(newrow, i) +		} +		integral = append(integral, newrow) +	} +	return integral +} + +func integralimgsq(img *image.Gray) [][]uint64 { +	b := img.Bounds() +	var oldy, oldx, oldxy uint64 +	var integral [][]uint64 +	for y := b.Min.Y; y < b.Max.Y; y++ { +		newrow := []uint64{} +		for x := b.Min.X; x < b.Max.X; x++ { +			oldx, oldy, oldxy = 0, 0, 0 +			if x > 0 { +				oldx = newrow[x-1] +			} +			if y > 0 { +				oldy = integral[y-1][x] +			} +			if x > 0 && y > 0 { +				oldxy = integral[y-1][x-1] +			} +			pixel := uint64(img.GrayAt(x, y).Y) +			i := pixel * pixel + oldx + oldy - oldxy +			newrow = append(newrow, i) +		} +		integral = append(integral, newrow) +	} +	return integral +} + +// this gets the values of the four corners of a window, which can +// be used to quickly calculate the mean of the area +func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow { +	step := size / 2 + +	minx, miny := 0, 0 +	maxy := len(integral)-1 +	maxx := len(integral[0])-1 + +	if y > (step+1) { +		miny = y - step - 1 +	} +	if x > (step+1) { +		minx = x - step - 1 +	} + +	if maxy > (y + step) { +		maxy = y + step +	} +	if maxx > (x + step) { +		maxx = x + step +	} + +	return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny} +} + +func integralmean(integral [][]uint64, x int, y int, size int) float64 { +	i := getintegralwindow(integral, x, y, size) +	total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft) +	sqsize := float64(i.width) * float64(i.height) +	return total / sqsize +} + +func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) { +	i := getintegralwindow(integral, x, y, size) +	isq := getintegralwindow(integralsq, x, y, size) + +	var total, sqtotal, sqsize float64 + +	sqsize = float64(i.width) * float64(i.height) + +	total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft) +	sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft) + +	mean := total / sqsize +	variance := (sqtotal / sqsize) - (mean * mean) +	return mean, math.Sqrt(variance) +} diff --git a/binarize/binarize.go b/binarize/main.go index fa8a30f..610effc 100644 --- a/binarize/binarize.go +++ b/binarize/main.go @@ -1,12 +1,15 @@  package main +// TODO: could look into other algorithms, see for examples see +//       the README at https://github.com/brandonmpetty/Doxa +  import (  	"flag"  	"fmt"  	"log"  	"os" -	"github.com/Ernyoke/Imger/imgio" +	"github.com/Ernyoke/Imger/imgio" // TODO: get rid of this and do things myself  )  func main() { @@ -14,7 +17,7 @@ func main() {  		fmt.Fprintf(os.Stderr, "Usage: binarize [-w num] [-k num] inimg outimg\n")  		flag.PrintDefaults()  	} -	wsize := flag.Int("w", 31, "Window size for sauvola algorithm") +	wsize := flag.Int("w", 31, "Window size for sauvola algorithm (needs to be odd)")          ksize := flag.Float64("k", 0.5, "K for sauvola algorithm")  	flag.Parse()  	if flag.NArg() < 2 { @@ -22,16 +25,17 @@ func main() {  		os.Exit(1)  	} +	if *wsize % 2 == 0 { +		*wsize++ +	} +  	img, err := imgio.ImreadGray(flag.Arg(0))  	if err != nil {  		log.Fatalf("Could not read image %s\n", flag.Arg(0))  	} -	// TODO: should be able to estimate an appropriate window size based on resolution -	thresh := Sauvola(img, *ksize, *wsize) -	if err != nil { -		log.Fatal("Error binarising image\n") -	} +	// TODO: estimate an appropriate window size based on resolution +	thresh := IntegralSauvola(img, *ksize, *wsize)  	err = imgio.Imwrite(thresh, flag.Arg(1))  	if err != nil { diff --git a/binarize/sauvola.go b/binarize/sauvola.go index f1d0512..bc311ad 100644 --- a/binarize/sauvola.go +++ b/binarize/sauvola.go @@ -3,81 +3,44 @@ package main  import (  	"image"  	"image/color" -	"math"  ) -func mean(i []int) float64 { -	sum := 0 -	for _, n := range i { -		sum += n -	} -	return float64(sum) / float64(len(i)) -} - -// TODO: is there a prettier way of doing this than float64() all over the place? -func stddev(i []int) float64 { -	m := mean(i) - -	var sum float64 -	for _, n := range i { -		sum += (float64(n) - m) * (float64(n) - m) -	} -	variance := float64(sum) / float64(len(i) - 1) -	return math.Sqrt(variance) -} - -func meanstddev(i []int) (float64, float64) { -	m := mean(i) - -	var sum float64 -	for _, n := range i { -		sum += (float64(n) - m) * (float64(n) - m) -	} -	variance := float64(sum) / float64(len(i) - 1) -	return m, math.Sqrt(variance) -} - -// gets the pixel values surrounding a point in the image -func surrounding(img *image.Gray, x int, y int, size int) []int { +// Implements Sauvola's algorithm for text binarization, see paper +// "Adaptive document image binarization" (2000) +func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {  	b := img.Bounds() +	new := image.NewGray(b) -	miny := y - size/2 -	if miny < b.Min.Y { -		miny = b.Min.Y -	} -	minx := x - size/2 -	if minx < b.Min.X { -		minx = b.Min.X -	} -	maxy := y + size/2 -	if maxy > b.Max.Y { -		maxy = b.Max.Y -	} -	maxx := x + size/2 -	if maxx > b.Max.X { -		maxx = b.Max.X -	} - -	var s []int -	for yi := miny; yi < maxy; yi++ { -		for xi := minx; xi < maxx; xi++ { -			s = append(s, int(img.GrayAt(xi, yi).Y)) +	for y := b.Min.Y; y < b.Max.Y; y++ { +		for x := b.Min.X; x < b.Max.X; x++ { +			window := surrounding(img, x, y, windowsize) +			m, dev := meanstddev(window) +			threshold := m * (1 + ksize * ((dev / 128) - 1)) +			if img.GrayAt(x, y).Y < uint8(threshold) { +				new.SetGray(x, y, color.Gray{0}) +			} else { +				new.SetGray(x, y, color.Gray{255}) +			}  		}  	} -	return s + +	return new  } -// TODO: parallelize -// TODO: switch to using integral images to make faster; see paper -//       "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" -func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { +// Implements Sauvola's algorithm using Integral Images, see paper +// "Effcient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" +// and +// https://stackoverflow.com/questions/13110733/computing-image-integral +func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {  	b := img.Bounds()  	new := image.NewGray(b) +	integral := integralimg(img) +	integralsq := integralimgsq(img) +  	for y := b.Min.Y; y < b.Max.Y; y++ {  		for x := b.Min.X; x < b.Max.X; x++ { -			window := surrounding(img, x, y, windowsize) -			m, dev := meanstddev(window) +			m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize)  			threshold := m * (1 + ksize * ((dev / 128) - 1))  			if img.GrayAt(x, y).Y < uint8(threshold) {  				new.SetGray(x, y, color.Gray{0}) diff --git a/binarize/util.go b/binarize/util.go new file mode 100644 index 0000000..e7cf0f8 --- /dev/null +++ b/binarize/util.go @@ -0,0 +1,67 @@ +package main + +import ( +	"image" +	"math" +) + +func mean(i []int) float64 { +	sum := 0 +	for _, n := range i { +		sum += n +	} +	return float64(sum) / float64(len(i)) +} + +func stddev(i []int) float64 { +	m := mean(i) + +	var sum float64 +	for _, n := range i { +		sum += (float64(n) - m) * (float64(n) - m) +	} +	variance := sum / float64(len(i) - 1) +	return math.Sqrt(variance) +} + +func meanstddev(i []int) (float64, float64) { +	m := mean(i) + +	var sum float64 +	for _, n := range i { +		sum += (float64(n) - m) * (float64(n) - m) +	} +	variance := float64(sum) / float64(len(i) - 1) +	return m, math.Sqrt(variance) +} + +// gets the pixel values surrounding a point in the image +func surrounding(img *image.Gray, x int, y int, size int) []int { +	b := img.Bounds() +	step := size / 2 + +	miny := y - step +	if miny < b.Min.Y { +		miny = b.Min.Y +	} +	minx := x - step +	if minx < b.Min.X { +		minx = b.Min.X +	} +	maxy := y + step +	if maxy > b.Max.Y { +		maxy = b.Max.Y +	} +	maxx := x + step +	if maxx > b.Max.X { +		maxx = b.Max.X +	} + +	var s []int +	for yi := miny; yi <= maxy; yi++ { +		for xi := minx; xi <= maxx; xi++ { +			s = append(s, int(img.GrayAt(xi, yi).Y)) +		} +	} +	return s +} | 
