diff options
Diffstat (limited to 'binarize')
-rw-r--r-- | binarize/cmd/binarize/main.go | 78 | ||||
-rw-r--r-- | binarize/integralimg.go | 116 | ||||
-rw-r--r-- | binarize/sauvola.go | 54 | ||||
-rw-r--r-- | binarize/sauvola_test.go | 108 | ||||
-rw-r--r-- | binarize/testdata/pg1.png | bin | 651071 -> 0 bytes | |||
-rw-r--r-- | binarize/testdata/pg1_integralsauvola_k0.3_w19.png | bin | 19456 -> 0 bytes | |||
-rw-r--r-- | binarize/testdata/pg1_integralsauvola_k0.5_w19.png | bin | 18241 -> 0 bytes | |||
-rw-r--r-- | binarize/testdata/pg1_integralsauvola_k0.5_w41.png | bin | 18260 -> 0 bytes | |||
-rw-r--r-- | binarize/testdata/pg1_sauvola_k0.3_w19.png | bin | 19447 -> 0 bytes | |||
-rw-r--r-- | binarize/testdata/pg1_sauvola_k0.5_w19.png | bin | 18231 -> 0 bytes | |||
-rw-r--r-- | binarize/testdata/pg1_sauvola_k0.5_w41.png | bin | 18275 -> 0 bytes | |||
-rw-r--r-- | binarize/util.go | 87 |
12 files changed, 0 insertions, 443 deletions
diff --git a/binarize/cmd/binarize/main.go b/binarize/cmd/binarize/main.go deleted file mode 100644 index bda3d93..0000000 --- a/binarize/cmd/binarize/main.go +++ /dev/null @@ -1,78 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/go.git/binarize" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n") - flag.PrintDefaults() - } - wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.") - ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") - btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *wsize == 0 { - *wsize = autowsize(b) - log.Printf("Set window size to %d\n", *wsize) - } - - if *wsize % 2 == 0 { - *wsize++ - } - - // TODO: come up with a way to set a good ksize automatically - - var thresh image.Image - thresh = binarize.IntegralSauvola(gray, *ksize, *wsize) - - if *btype == "zeroinv" { - thresh, err = binarize.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, thresh) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/binarize/integralimg.go b/binarize/integralimg.go deleted file mode 100644 index 382b495..0000000 --- a/binarize/integralimg.go +++ /dev/null @@ -1,116 +0,0 @@ -package binarize - -import ( - "image" - "math" -) - -type integralwindow struct { - topleft uint64 - topright uint64 - bottomleft uint64 - bottomright uint64 - width int - height int -} - -func Integralimg(img *image.Gray) [][]uint64 { - b := img.Bounds() - var oldy, oldx, oldxy uint64 - var integral [][]uint64 - for y := b.Min.Y; y < b.Max.Y; y++ { - newrow := []uint64{} - for x := b.Min.X; x < b.Max.X; x++ { - oldx, oldy, oldxy = 0, 0, 0 - if x > 0 { - oldx = newrow[x-1] - } - if y > 0 { - oldy = integral[y-1][x] - } - if x > 0 && y > 0 { - oldxy = integral[y-1][x-1] - } - pixel := uint64(img.GrayAt(x, y).Y) - i := pixel + oldx + oldy - oldxy - newrow = append(newrow, i) - } - integral = append(integral, newrow) - } - return integral -} - -func integralimgsq(img *image.Gray) [][]uint64 { - b := img.Bounds() - var oldy, oldx, oldxy uint64 - var integral [][]uint64 - for y := b.Min.Y; y < b.Max.Y; y++ { - newrow := []uint64{} - for x := b.Min.X; x < b.Max.X; x++ { - oldx, oldy, oldxy = 0, 0, 0 - if x > 0 { - oldx = newrow[x-1] - } - if y > 0 { - oldy = integral[y-1][x] - } - if x > 0 && y > 0 { - oldxy = integral[y-1][x-1] - } - pixel := uint64(img.GrayAt(x, y).Y) - i := pixel * pixel + oldx + oldy - oldxy - newrow = append(newrow, i) - } - integral = append(integral, newrow) - } - return integral -} - -// this gets the values of the four corners of a window, which can -// be used to quickly calculate the mean of the area -func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow { - step := size / 2 - - minx, miny := 0, 0 - maxy := len(integral)-1 - maxx := len(integral[0])-1 - - if y > (step+1) { - miny = y - step - 1 - } - if x > (step+1) { - minx = x - step - 1 - } - - if maxy > (y + step) { - maxy = y + step - } - if maxx > (x + step) { - maxx = x + step - } - - return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny} -} - -func integralmean(integral [][]uint64, x int, y int, size int) float64 { - i := getintegralwindow(integral, x, y, size) - total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft) - sqsize := float64(i.width) * float64(i.height) - return total / sqsize -} - -func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) { - i := getintegralwindow(integral, x, y, size) - isq := getintegralwindow(integralsq, x, y, size) - - var total, sqtotal, sqsize float64 - - sqsize = float64(i.width) * float64(i.height) - - total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft) - sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft) - - mean := total / sqsize - variance := (sqtotal / sqsize) - (mean * mean) - return mean, math.Sqrt(variance) -} diff --git a/binarize/sauvola.go b/binarize/sauvola.go deleted file mode 100644 index 6d9c1af..0000000 --- a/binarize/sauvola.go +++ /dev/null @@ -1,54 +0,0 @@ -package binarize - -import ( - "image" - "image/color" -) - -// Implements Sauvola's algorithm for text binarization, see paper -// "Adaptive document image binarization" (2000) -func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - window := surrounding(img, x, y, windowsize) - m, dev := meanstddev(window) - threshold := m * (1 + ksize * ((dev / 128) - 1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} - -// Implements Sauvola's algorithm using Integral Images, see paper -// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" -// and -// https://stackoverflow.com/questions/13110733/computing-image-integral -func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - integral := Integralimg(img) - integralsq := integralimgsq(img) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize) - threshold := m * (1 + ksize * ((dev / 128) - 1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} diff --git a/binarize/sauvola_test.go b/binarize/sauvola_test.go deleted file mode 100644 index 5faeb61..0000000 --- a/binarize/sauvola_test.go +++ /dev/null @@ -1,108 +0,0 @@ -package binarize - -import ( - "flag" - "fmt" - "image" - "image/draw" - "image/png" - "os" - "testing" -) - -var update = flag.Bool("update", false, "update golden files") - -func decode(s string) (*image.Gray, error) { - f, err := os.Open(s) - defer f.Close() - if err != nil { - return nil, err - } - img, err := png.Decode(f) - if err != nil { - return nil, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - return gray, nil -} - -func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { - b := img1.Bounds() - if ! b.Eq(img2.Bounds()) { - return false - } - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - r0, g0, b0, a0 := img1.At(x, y).RGBA() - r1, g1, b1, a1 := img2.At(x, y).RGBA() - if r0 != r1 { - return false - } - if g0 != g1 { - return false - } - if b0 != b1 { - return false - } - if a0 != a1 { - return false - } - } - } - return true -} - -func TestBinarization(t *testing.T) { - cases := []struct { - name string - orig string - golden string - ksize float64 - wsize int - }{ - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19}, - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) { - var actual *image.Gray - orig, err := decode(c.orig) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.orig, err) - } - switch c.name { - case "integralsauvola": - actual = IntegralSauvola(orig, c.ksize, c.wsize) - case "sauvola": - actual = Sauvola(orig, c.ksize, c.wsize) - default: - t.Fatalf("No method %s\n", c.name) - } - if *update { - f, err := os.Create(c.golden) - defer f.Close() - if err != nil { - t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) - } - err = png.Encode(f, actual) - if err != nil { - t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) - } - } - golden, err := decode(c.golden) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.golden, err) - } - if ! imgsequal(golden, actual) { - t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden) - } - }) - } -} diff --git a/binarize/testdata/pg1.png b/binarize/testdata/pg1.png Binary files differdeleted file mode 100644 index 2bcc4b1..0000000 --- a/binarize/testdata/pg1.png +++ /dev/null diff --git a/binarize/testdata/pg1_integralsauvola_k0.3_w19.png b/binarize/testdata/pg1_integralsauvola_k0.3_w19.png Binary files differdeleted file mode 100644 index bdf5712..0000000 --- a/binarize/testdata/pg1_integralsauvola_k0.3_w19.png +++ /dev/null diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w19.png b/binarize/testdata/pg1_integralsauvola_k0.5_w19.png Binary files differdeleted file mode 100644 index 5db2d9a..0000000 --- a/binarize/testdata/pg1_integralsauvola_k0.5_w19.png +++ /dev/null diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w41.png b/binarize/testdata/pg1_integralsauvola_k0.5_w41.png Binary files differdeleted file mode 100644 index 050d037..0000000 --- a/binarize/testdata/pg1_integralsauvola_k0.5_w41.png +++ /dev/null diff --git a/binarize/testdata/pg1_sauvola_k0.3_w19.png b/binarize/testdata/pg1_sauvola_k0.3_w19.png Binary files differdeleted file mode 100644 index bcd595f..0000000 --- a/binarize/testdata/pg1_sauvola_k0.3_w19.png +++ /dev/null diff --git a/binarize/testdata/pg1_sauvola_k0.5_w19.png b/binarize/testdata/pg1_sauvola_k0.5_w19.png Binary files differdeleted file mode 100644 index 8de596c..0000000 --- a/binarize/testdata/pg1_sauvola_k0.5_w19.png +++ /dev/null diff --git a/binarize/testdata/pg1_sauvola_k0.5_w41.png b/binarize/testdata/pg1_sauvola_k0.5_w41.png Binary files differdeleted file mode 100644 index b8f50e0..0000000 --- a/binarize/testdata/pg1_sauvola_k0.5_w41.png +++ /dev/null diff --git a/binarize/util.go b/binarize/util.go deleted file mode 100644 index ad641c9..0000000 --- a/binarize/util.go +++ /dev/null @@ -1,87 +0,0 @@ -package binarize - -import ( - "errors" - "image" - "math" -) - -func mean(i []int) float64 { - sum := 0 - for _, n := range i { - sum += n - } - return float64(sum) / float64(len(i)) -} - -func stddev(i []int) float64 { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := sum / float64(len(i) - 1) - return math.Sqrt(variance) -} - -func meanstddev(i []int) (float64, float64) { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i) - 1) - return m, math.Sqrt(variance) -} - -// gets the pixel values surrounding a point in the image -func surrounding(img *image.Gray, x int, y int, size int) []int { - b := img.Bounds() - step := size / 2 - - miny := y - step - if miny < b.Min.Y { - miny = b.Min.Y - } - minx := x - step - if minx < b.Min.X { - minx = b.Min.X - } - maxy := y + step - if maxy > b.Max.Y { - maxy = b.Max.Y - } - maxx := x + step - if maxx > b.Max.X { - maxx = b.Max.X - } - - var s []int - for yi := miny; yi <= maxy; yi++ { - for xi := minx; xi <= maxx; xi++ { - s = append(s, int(img.GrayAt(xi, yi).Y)) - } - } - return s -} - -func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) { - b := bin.Bounds() - if ! b.Eq(orig.Bounds()) { - return orig, errors.New("bin and orig images need to be the same dimensions") - } - newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy())) - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - if bin.GrayAt(x, y).Y == 255 { - newimg.Set(x, y, bin.GrayAt(x, y)) - } else { - newimg.Set(x, y, orig.At(x, y)) - } - } - } - - return newimg, nil -} |