diff options
Diffstat (limited to 'preproc')
-rw-r--r-- | preproc/cmd/binarize/main.go | 78 | ||||
-rw-r--r-- | preproc/helpers_test.go | 56 | ||||
-rw-r--r-- | preproc/sauvola.go | 55 | ||||
-rw-r--r-- | preproc/sauvola_test.go | 62 | ||||
-rw-r--r-- | preproc/testdata/pg1.png | bin | 30803 -> 651071 bytes | |||
-rw-r--r-- | preproc/testdata/pg1_integralsauvola_k0.3_w19.png | bin | 0 -> 19456 bytes | |||
-rw-r--r-- | preproc/testdata/pg1_integralsauvola_k0.5_w19.png | bin | 0 -> 18241 bytes | |||
-rw-r--r-- | preproc/testdata/pg1_integralsauvola_k0.5_w41.png | bin | 0 -> 18260 bytes | |||
-rw-r--r-- | preproc/testdata/pg1_sauvola_k0.3_w19.png | bin | 0 -> 19447 bytes | |||
-rw-r--r-- | preproc/testdata/pg1_sauvola_k0.5_w19.png | bin | 0 -> 18231 bytes | |||
-rw-r--r-- | preproc/testdata/pg1_sauvola_k0.5_w41.png | bin | 0 -> 18275 bytes | |||
-rw-r--r-- | preproc/testdata/pg2.png | bin | 0 -> 30803 bytes | |||
-rw-r--r-- | preproc/testdata/pg2_integralwipesides_t0.02_w5.png (renamed from preproc/testdata/pg1_integralwipesides_t0.02_w5.png) | bin | 33595 -> 33595 bytes | |||
-rw-r--r-- | preproc/testdata/pg2_integralwipesides_t0.05_w25.png (renamed from preproc/testdata/pg1_integralwipesides_t0.05_w25.png) | bin | 33432 -> 33432 bytes | |||
-rw-r--r-- | preproc/testdata/pg2_integralwipesides_t0.05_w5.png (renamed from preproc/testdata/pg1_integralwipesides_t0.05_w5.png) | bin | 14546 -> 14546 bytes | |||
-rw-r--r-- | preproc/util.go | 95 | ||||
-rw-r--r-- | preproc/wipesides.go | 4 | ||||
-rw-r--r-- | preproc/wipesides_test.go | 52 |
18 files changed, 351 insertions, 51 deletions
diff --git a/preproc/cmd/binarize/main.go b/preproc/cmd/binarize/main.go new file mode 100644 index 0000000..c274f9c --- /dev/null +++ b/preproc/cmd/binarize/main.go @@ -0,0 +1,78 @@ +package main + +import ( + "flag" + "fmt" + "image" + "image/draw" + _ "image/jpeg" + "image/png" + "log" + "os" + + "rescribe.xyz/go.git/preproc" +) + +// TODO: do more testing to see how good this assumption is +func autowsize(bounds image.Rectangle) int { + return bounds.Dx() / 60 +} + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n") + flag.PrintDefaults() + } + wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.") + ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") + btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.") + flag.Parse() + if flag.NArg() < 2 { + flag.Usage() + os.Exit(1) + } + + f, err := os.Open(flag.Arg(0)) + defer f.Close() + if err != nil { + log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) + } + img, _, err := image.Decode(f) + if err != nil { + log.Fatalf("Could not decode image: %v\n", err) + } + b := img.Bounds() + gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) + draw.Draw(gray, b, img, b.Min, draw.Src) + + if *wsize == 0 { + *wsize = autowsize(b) + log.Printf("Set window size to %d\n", *wsize) + } + + if *wsize % 2 == 0 { + *wsize++ + } + + // TODO: come up with a way to set a good ksize automatically + + var thresh image.Image + thresh = preproc.IntegralSauvola(gray, *ksize, *wsize) + + if *btype == "zeroinv" { + thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA)) + if err != nil { + log.Fatal(err) + } + } + + f, err = os.Create(flag.Arg(1)) + if err != nil { + log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) + } + defer f.Close() + err = png.Encode(f, thresh) + if err != nil { + log.Fatalf("Could not encode image: %v\n", err) + } +} diff --git a/preproc/helpers_test.go b/preproc/helpers_test.go new file mode 100644 index 0000000..326b59d --- /dev/null +++ b/preproc/helpers_test.go @@ -0,0 +1,56 @@ +package preproc + +// TODO: add different pages as test cases +// TODO: test non integral img version + +import ( + "flag" + "image" + "image/draw" + "image/png" + "os" +) + +var update = flag.Bool("update", false, "update golden files") + +func decode(s string) (*image.Gray, error) { + f, err := os.Open(s) + defer f.Close() + if err != nil { + return nil, err + } + img, err := png.Decode(f) + if err != nil { + return nil, err + } + b := img.Bounds() + gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) + draw.Draw(gray, b, img, b.Min, draw.Src) + return gray, nil +} + +func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { + b := img1.Bounds() + if !b.Eq(img2.Bounds()) { + return false + } + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + r0, g0, b0, a0 := img1.At(x, y).RGBA() + r1, g1, b1, a1 := img2.At(x, y).RGBA() + if r0 != r1 { + return false + } + if g0 != g1 { + return false + } + if b0 != b1 { + return false + } + if a0 != a1 { + return false + } + } + } + return true +} diff --git a/preproc/sauvola.go b/preproc/sauvola.go new file mode 100644 index 0000000..e93ea81 --- /dev/null +++ b/preproc/sauvola.go @@ -0,0 +1,55 @@ +package preproc + +import ( + "image" + "image/color" + + "rescribe.xyz/go.git/integralimg" +) + +// Implements Sauvola's algorithm for text binarization, see paper +// "Adaptive document image binarization" (2000) +func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { + b := img.Bounds() + new := image.NewGray(b) + + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + window := surrounding(img, x, y, windowsize) + m, dev := meanstddev(window) + threshold := m * (1 + ksize * ((dev / 128) - 1)) + if img.GrayAt(x, y).Y < uint8(threshold) { + new.SetGray(x, y, color.Gray{0}) + } else { + new.SetGray(x, y, color.Gray{255}) + } + } + } + + return new +} + +// Implements Sauvola's algorithm using Integral Images, see paper +// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" +// and +// https://stackoverflow.com/questions/13110733/computing-image-integral +func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { + b := img.Bounds() + new := image.NewGray(b) + + integrals := integralimg.ToAllIntegralImg(img) + + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + m, dev := integrals.MeanStdDevWindow(x, y, windowsize) + threshold := m * (1 + ksize * ((dev / 128) - 1)) + if img.GrayAt(x, y).Y < uint8(threshold) { + new.SetGray(x, y, color.Gray{0}) + } else { + new.SetGray(x, y, color.Gray{255}) + } + } + } + + return new +} diff --git a/preproc/sauvola_test.go b/preproc/sauvola_test.go new file mode 100644 index 0000000..1397a4f --- /dev/null +++ b/preproc/sauvola_test.go @@ -0,0 +1,62 @@ +package preproc + +import ( + "fmt" + "image" + "image/png" + "os" + "testing" +) + +func TestBinarization(t *testing.T) { + cases := []struct { + name string + orig string + golden string + ksize float64 + wsize int + }{ + {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41}, + {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19}, + {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19}, + {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41}, + {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19}, + {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19}, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) { + var actual *image.Gray + orig, err := decode(c.orig) + if err != nil { + t.Fatalf("Could not open file %s: %v\n", c.orig, err) + } + switch c.name { + case "integralsauvola": + actual = IntegralSauvola(orig, c.ksize, c.wsize) + case "sauvola": + actual = Sauvola(orig, c.ksize, c.wsize) + default: + t.Fatalf("No method %s\n", c.name) + } + if *update { + f, err := os.Create(c.golden) + defer f.Close() + if err != nil { + t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) + } + err = png.Encode(f, actual) + if err != nil { + t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) + } + } + golden, err := decode(c.golden) + if err != nil { + t.Fatalf("Could not open file %s: %v\n", c.golden, err) + } + if ! imgsequal(golden, actual) { + t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden) + } + }) + } +} diff --git a/preproc/testdata/pg1.png b/preproc/testdata/pg1.png Binary files differindex c7c4249..2bcc4b1 100644 --- a/preproc/testdata/pg1.png +++ b/preproc/testdata/pg1.png diff --git a/preproc/testdata/pg1_integralsauvola_k0.3_w19.png b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png Binary files differnew file mode 100644 index 0000000..bdf5712 --- /dev/null +++ b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w19.png b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png Binary files differnew file mode 100644 index 0000000..5db2d9a --- /dev/null +++ b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w41.png b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png Binary files differnew file mode 100644 index 0000000..050d037 --- /dev/null +++ b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png diff --git a/preproc/testdata/pg1_sauvola_k0.3_w19.png b/preproc/testdata/pg1_sauvola_k0.3_w19.png Binary files differnew file mode 100644 index 0000000..bcd595f --- /dev/null +++ b/preproc/testdata/pg1_sauvola_k0.3_w19.png diff --git a/preproc/testdata/pg1_sauvola_k0.5_w19.png b/preproc/testdata/pg1_sauvola_k0.5_w19.png Binary files differnew file mode 100644 index 0000000..8de596c --- /dev/null +++ b/preproc/testdata/pg1_sauvola_k0.5_w19.png diff --git a/preproc/testdata/pg1_sauvola_k0.5_w41.png b/preproc/testdata/pg1_sauvola_k0.5_w41.png Binary files differnew file mode 100644 index 0000000..b8f50e0 --- /dev/null +++ b/preproc/testdata/pg1_sauvola_k0.5_w41.png diff --git a/preproc/testdata/pg2.png b/preproc/testdata/pg2.png Binary files differnew file mode 100644 index 0000000..c7c4249 --- /dev/null +++ b/preproc/testdata/pg2.png diff --git a/preproc/testdata/pg1_integralwipesides_t0.02_w5.png b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png Binary files differindex 6b4ccb2..6b4ccb2 100644 --- a/preproc/testdata/pg1_integralwipesides_t0.02_w5.png +++ b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w25.png b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png Binary files differindex 39dc88d..39dc88d 100644 --- a/preproc/testdata/pg1_integralwipesides_t0.05_w25.png +++ b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w5.png b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png Binary files differindex 50df855..50df855 100644 --- a/preproc/testdata/pg1_integralwipesides_t0.05_w5.png +++ b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png diff --git a/preproc/util.go b/preproc/util.go new file mode 100644 index 0000000..5f8a9f1 --- /dev/null +++ b/preproc/util.go @@ -0,0 +1,95 @@ +package preproc + +import ( + "errors" + "image" + "math" +) + +// TODO: name better; maybe verb, x-er +// TODO: implement these for regular image, and use them to make +// image functions generic for integral and non- images +type UsefulImg interface { + MeanWindow() + MeanStdDevWindow() +} + +func mean(i []int) float64 { + sum := 0 + for _, n := range i { + sum += n + } + return float64(sum) / float64(len(i)) +} + +func stddev(i []int) float64 { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := sum / float64(len(i) - 1) + return math.Sqrt(variance) +} + +func meanstddev(i []int) (float64, float64) { + m := mean(i) + + var sum float64 + for _, n := range i { + sum += (float64(n) - m) * (float64(n) - m) + } + variance := float64(sum) / float64(len(i) - 1) + return m, math.Sqrt(variance) +} + +// gets the pixel values surrounding a point in the image +func surrounding(img *image.Gray, x int, y int, size int) []int { + b := img.Bounds() + step := size / 2 + + miny := y - step + if miny < b.Min.Y { + miny = b.Min.Y + } + minx := x - step + if minx < b.Min.X { + minx = b.Min.X + } + maxy := y + step + if maxy > b.Max.Y { + maxy = b.Max.Y + } + maxx := x + step + if maxx > b.Max.X { + maxx = b.Max.X + } + + var s []int + for yi := miny; yi <= maxy; yi++ { + for xi := minx; xi <= maxx; xi++ { + s = append(s, int(img.GrayAt(xi, yi).Y)) + } + } + return s +} + +func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) { + b := bin.Bounds() + if ! b.Eq(orig.Bounds()) { + return orig, errors.New("bin and orig images need to be the same dimensions") + } + newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy())) + for y := b.Min.Y; y < b.Max.Y; y++ { + for x := b.Min.X; x < b.Max.X; x++ { + if bin.GrayAt(x, y).Y == 255 { + newimg.Set(x, y, bin.GrayAt(x, y)) + } else { + newimg.Set(x, y, orig.At(x, y)) + } + } + } + + return newimg, nil +} diff --git a/preproc/wipesides.go b/preproc/wipesides.go index c773054..4806e93 100644 --- a/preproc/wipesides.go +++ b/preproc/wipesides.go @@ -7,7 +7,7 @@ import ( "image" "image/color" - "rescribe.xyz/go.git/binarize" + "rescribe.xyz/go.git/integralimg" ) type IntWindow struct { // TODO: put this in its own package @@ -126,7 +126,7 @@ func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray { // wipe fills the sections of image which fall outside the content // area with white func Wipe(img *image.Gray, wsize int, thresh float64) *image.Gray { - integral := binarize.Integralimg(img) + integral := integralimg.ToIntegralImg(img) lowedge, highedge := findedges(integral, wsize, thresh) return wipesides(img, lowedge, highedge) } diff --git a/preproc/wipesides_test.go b/preproc/wipesides_test.go index b0ada4e..f66f39b 100644 --- a/preproc/wipesides_test.go +++ b/preproc/wipesides_test.go @@ -4,59 +4,13 @@ package preproc // TODO: test non integral img version import ( - "flag" "fmt" "image" - "image/draw" "image/png" "os" "testing" ) -var update = flag.Bool("update", false, "update golden files") - -func decode(s string) (*image.Gray, error) { - f, err := os.Open(s) - defer f.Close() - if err != nil { - return nil, err - } - img, err := png.Decode(f) - if err != nil { - return nil, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - return gray, nil -} - -func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { - b := img1.Bounds() - if !b.Eq(img2.Bounds()) { - return false - } - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - r0, g0, b0, a0 := img1.At(x, y).RGBA() - r1, g1, b1, a1 := img2.At(x, y).RGBA() - if r0 != r1 { - return false - } - if g0 != g1 { - return false - } - if b0 != b1 { - return false - } - if a0 != a1 { - return false - } - } - } - return true -} - func TestWipeSides(t *testing.T) { cases := []struct { name string @@ -65,9 +19,9 @@ func TestWipeSides(t *testing.T) { thresh float64 wsize int }{ - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.02_w5.png", 0.02, 5}, - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w5.png", 0.05, 5}, - {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w25.png", 0.05, 25}, + {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.02_w5.png", 0.02, 5}, + {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w5.png", 0.05, 5}, + {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w25.png", 0.05, 25}, } for _, c := range cases { |