summaryrefslogtreecommitdiff
path: root/binarize
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-13 19:23:03 +0100
committerNick White <git@njw.name>2019-05-13 19:23:03 +0100
commit02a6c66eb77a5b455bcf2d0547d2383074eb7e41 (patch)
tree00f2bde1dfa7a1b23e55478309f26ba1b54fdab9 /binarize
parentd94fb9f7e74aaad335a587030ed2b4ce44c24cbf (diff)
Reorganise image manipulation to separate integral image parts
Also unify everything else under preproc/ Note that the UsefulImg interface should be used by the main functions, to simplify things, but this hasn't been done yet.
Diffstat (limited to 'binarize')
-rw-r--r--binarize/cmd/binarize/main.go78
-rw-r--r--binarize/integralimg.go116
-rw-r--r--binarize/sauvola.go54
-rw-r--r--binarize/sauvola_test.go108
-rw-r--r--binarize/testdata/pg1.pngbin651071 -> 0 bytes
-rw-r--r--binarize/testdata/pg1_integralsauvola_k0.3_w19.pngbin19456 -> 0 bytes
-rw-r--r--binarize/testdata/pg1_integralsauvola_k0.5_w19.pngbin18241 -> 0 bytes
-rw-r--r--binarize/testdata/pg1_integralsauvola_k0.5_w41.pngbin18260 -> 0 bytes
-rw-r--r--binarize/testdata/pg1_sauvola_k0.3_w19.pngbin19447 -> 0 bytes
-rw-r--r--binarize/testdata/pg1_sauvola_k0.5_w19.pngbin18231 -> 0 bytes
-rw-r--r--binarize/testdata/pg1_sauvola_k0.5_w41.pngbin18275 -> 0 bytes
-rw-r--r--binarize/util.go87
12 files changed, 0 insertions, 443 deletions
diff --git a/binarize/cmd/binarize/main.go b/binarize/cmd/binarize/main.go
deleted file mode 100644
index bda3d93..0000000
--- a/binarize/cmd/binarize/main.go
+++ /dev/null
@@ -1,78 +0,0 @@
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/go.git/binarize"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n")
- flag.PrintDefaults()
- }
- wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
- ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
- btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *wsize == 0 {
- *wsize = autowsize(b)
- log.Printf("Set window size to %d\n", *wsize)
- }
-
- if *wsize % 2 == 0 {
- *wsize++
- }
-
- // TODO: come up with a way to set a good ksize automatically
-
- var thresh image.Image
- thresh = binarize.IntegralSauvola(gray, *ksize, *wsize)
-
- if *btype == "zeroinv" {
- thresh, err = binarize.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, thresh)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/binarize/integralimg.go b/binarize/integralimg.go
deleted file mode 100644
index 382b495..0000000
--- a/binarize/integralimg.go
+++ /dev/null
@@ -1,116 +0,0 @@
-package binarize
-
-import (
- "image"
- "math"
-)
-
-type integralwindow struct {
- topleft uint64
- topright uint64
- bottomleft uint64
- bottomright uint64
- width int
- height int
-}
-
-func Integralimg(img *image.Gray) [][]uint64 {
- b := img.Bounds()
- var oldy, oldx, oldxy uint64
- var integral [][]uint64
- for y := b.Min.Y; y < b.Max.Y; y++ {
- newrow := []uint64{}
- for x := b.Min.X; x < b.Max.X; x++ {
- oldx, oldy, oldxy = 0, 0, 0
- if x > 0 {
- oldx = newrow[x-1]
- }
- if y > 0 {
- oldy = integral[y-1][x]
- }
- if x > 0 && y > 0 {
- oldxy = integral[y-1][x-1]
- }
- pixel := uint64(img.GrayAt(x, y).Y)
- i := pixel + oldx + oldy - oldxy
- newrow = append(newrow, i)
- }
- integral = append(integral, newrow)
- }
- return integral
-}
-
-func integralimgsq(img *image.Gray) [][]uint64 {
- b := img.Bounds()
- var oldy, oldx, oldxy uint64
- var integral [][]uint64
- for y := b.Min.Y; y < b.Max.Y; y++ {
- newrow := []uint64{}
- for x := b.Min.X; x < b.Max.X; x++ {
- oldx, oldy, oldxy = 0, 0, 0
- if x > 0 {
- oldx = newrow[x-1]
- }
- if y > 0 {
- oldy = integral[y-1][x]
- }
- if x > 0 && y > 0 {
- oldxy = integral[y-1][x-1]
- }
- pixel := uint64(img.GrayAt(x, y).Y)
- i := pixel * pixel + oldx + oldy - oldxy
- newrow = append(newrow, i)
- }
- integral = append(integral, newrow)
- }
- return integral
-}
-
-// this gets the values of the four corners of a window, which can
-// be used to quickly calculate the mean of the area
-func getintegralwindow(integral [][]uint64, x int, y int, size int) integralwindow {
- step := size / 2
-
- minx, miny := 0, 0
- maxy := len(integral)-1
- maxx := len(integral[0])-1
-
- if y > (step+1) {
- miny = y - step - 1
- }
- if x > (step+1) {
- minx = x - step - 1
- }
-
- if maxy > (y + step) {
- maxy = y + step
- }
- if maxx > (x + step) {
- maxx = x + step
- }
-
- return integralwindow { integral[miny][minx], integral[miny][maxx], integral[maxy][minx], integral[maxy][maxx], maxx-minx, maxy-miny}
-}
-
-func integralmean(integral [][]uint64, x int, y int, size int) float64 {
- i := getintegralwindow(integral, x, y, size)
- total := float64(i.bottomright + i.topleft - i.topright - i.bottomleft)
- sqsize := float64(i.width) * float64(i.height)
- return total / sqsize
-}
-
-func integralmeanstddev(integral [][]uint64, integralsq [][]uint64, x int, y int, size int) (float64, float64) {
- i := getintegralwindow(integral, x, y, size)
- isq := getintegralwindow(integralsq, x, y, size)
-
- var total, sqtotal, sqsize float64
-
- sqsize = float64(i.width) * float64(i.height)
-
- total = float64(i.bottomright + i.topleft - i.topright - i.bottomleft)
- sqtotal = float64(isq.bottomright + isq.topleft - isq.topright - isq.bottomleft)
-
- mean := total / sqsize
- variance := (sqtotal / sqsize) - (mean * mean)
- return mean, math.Sqrt(variance)
-}
diff --git a/binarize/sauvola.go b/binarize/sauvola.go
deleted file mode 100644
index 6d9c1af..0000000
--- a/binarize/sauvola.go
+++ /dev/null
@@ -1,54 +0,0 @@
-package binarize
-
-import (
- "image"
- "image/color"
-)
-
-// Implements Sauvola's algorithm for text binarization, see paper
-// "Adaptive document image binarization" (2000)
-func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- window := surrounding(img, x, y, windowsize)
- m, dev := meanstddev(window)
- threshold := m * (1 + ksize * ((dev / 128) - 1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
-
-// Implements Sauvola's algorithm using Integral Images, see paper
-// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
-// and
-// https://stackoverflow.com/questions/13110733/computing-image-integral
-func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- integral := Integralimg(img)
- integralsq := integralimgsq(img)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- m, dev := integralmeanstddev(integral, integralsq, x, y, windowsize)
- threshold := m * (1 + ksize * ((dev / 128) - 1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
diff --git a/binarize/sauvola_test.go b/binarize/sauvola_test.go
deleted file mode 100644
index 5faeb61..0000000
--- a/binarize/sauvola_test.go
+++ /dev/null
@@ -1,108 +0,0 @@
-package binarize
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- "image/png"
- "os"
- "testing"
-)
-
-var update = flag.Bool("update", false, "update golden files")
-
-func decode(s string) (*image.Gray, error) {
- f, err := os.Open(s)
- defer f.Close()
- if err != nil {
- return nil, err
- }
- img, err := png.Decode(f)
- if err != nil {
- return nil, err
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
- return gray, nil
-}
-
-func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
- b := img1.Bounds()
- if ! b.Eq(img2.Bounds()) {
- return false
- }
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- r0, g0, b0, a0 := img1.At(x, y).RGBA()
- r1, g1, b1, a1 := img2.At(x, y).RGBA()
- if r0 != r1 {
- return false
- }
- if g0 != g1 {
- return false
- }
- if b0 != b1 {
- return false
- }
- if a0 != a1 {
- return false
- }
- }
- }
- return true
-}
-
-func TestBinarization(t *testing.T) {
- cases := []struct {
- name string
- orig string
- golden string
- ksize float64
- wsize int
- }{
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41},
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19},
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19},
- }
-
- for _, c := range cases {
- t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) {
- var actual *image.Gray
- orig, err := decode(c.orig)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.orig, err)
- }
- switch c.name {
- case "integralsauvola":
- actual = IntegralSauvola(orig, c.ksize, c.wsize)
- case "sauvola":
- actual = Sauvola(orig, c.ksize, c.wsize)
- default:
- t.Fatalf("No method %s\n", c.name)
- }
- if *update {
- f, err := os.Create(c.golden)
- defer f.Close()
- if err != nil {
- t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
- }
- err = png.Encode(f, actual)
- if err != nil {
- t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
- }
- }
- golden, err := decode(c.golden)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.golden, err)
- }
- if ! imgsequal(golden, actual) {
- t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden)
- }
- })
- }
-}
diff --git a/binarize/testdata/pg1.png b/binarize/testdata/pg1.png
deleted file mode 100644
index 2bcc4b1..0000000
--- a/binarize/testdata/pg1.png
+++ /dev/null
Binary files differ
diff --git a/binarize/testdata/pg1_integralsauvola_k0.3_w19.png b/binarize/testdata/pg1_integralsauvola_k0.3_w19.png
deleted file mode 100644
index bdf5712..0000000
--- a/binarize/testdata/pg1_integralsauvola_k0.3_w19.png
+++ /dev/null
Binary files differ
diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w19.png b/binarize/testdata/pg1_integralsauvola_k0.5_w19.png
deleted file mode 100644
index 5db2d9a..0000000
--- a/binarize/testdata/pg1_integralsauvola_k0.5_w19.png
+++ /dev/null
Binary files differ
diff --git a/binarize/testdata/pg1_integralsauvola_k0.5_w41.png b/binarize/testdata/pg1_integralsauvola_k0.5_w41.png
deleted file mode 100644
index 050d037..0000000
--- a/binarize/testdata/pg1_integralsauvola_k0.5_w41.png
+++ /dev/null
Binary files differ
diff --git a/binarize/testdata/pg1_sauvola_k0.3_w19.png b/binarize/testdata/pg1_sauvola_k0.3_w19.png
deleted file mode 100644
index bcd595f..0000000
--- a/binarize/testdata/pg1_sauvola_k0.3_w19.png
+++ /dev/null
Binary files differ
diff --git a/binarize/testdata/pg1_sauvola_k0.5_w19.png b/binarize/testdata/pg1_sauvola_k0.5_w19.png
deleted file mode 100644
index 8de596c..0000000
--- a/binarize/testdata/pg1_sauvola_k0.5_w19.png
+++ /dev/null
Binary files differ
diff --git a/binarize/testdata/pg1_sauvola_k0.5_w41.png b/binarize/testdata/pg1_sauvola_k0.5_w41.png
deleted file mode 100644
index b8f50e0..0000000
--- a/binarize/testdata/pg1_sauvola_k0.5_w41.png
+++ /dev/null
Binary files differ
diff --git a/binarize/util.go b/binarize/util.go
deleted file mode 100644
index ad641c9..0000000
--- a/binarize/util.go
+++ /dev/null
@@ -1,87 +0,0 @@
-package binarize
-
-import (
- "errors"
- "image"
- "math"
-)
-
-func mean(i []int) float64 {
- sum := 0
- for _, n := range i {
- sum += n
- }
- return float64(sum) / float64(len(i))
-}
-
-func stddev(i []int) float64 {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := sum / float64(len(i) - 1)
- return math.Sqrt(variance)
-}
-
-func meanstddev(i []int) (float64, float64) {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := float64(sum) / float64(len(i) - 1)
- return m, math.Sqrt(variance)
-}
-
-// gets the pixel values surrounding a point in the image
-func surrounding(img *image.Gray, x int, y int, size int) []int {
- b := img.Bounds()
- step := size / 2
-
- miny := y - step
- if miny < b.Min.Y {
- miny = b.Min.Y
- }
- minx := x - step
- if minx < b.Min.X {
- minx = b.Min.X
- }
- maxy := y + step
- if maxy > b.Max.Y {
- maxy = b.Max.Y
- }
- maxx := x + step
- if maxx > b.Max.X {
- maxx = b.Max.X
- }
-
- var s []int
- for yi := miny; yi <= maxy; yi++ {
- for xi := minx; xi <= maxx; xi++ {
- s = append(s, int(img.GrayAt(xi, yi).Y))
- }
- }
- return s
-}
-
-func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) {
- b := bin.Bounds()
- if ! b.Eq(orig.Bounds()) {
- return orig, errors.New("bin and orig images need to be the same dimensions")
- }
- newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy()))
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- if bin.GrayAt(x, y).Y == 255 {
- newimg.Set(x, y, bin.GrayAt(x, y))
- } else {
- newimg.Set(x, y, orig.At(x, y))
- }
- }
- }
-
- return newimg, nil
-}