summaryrefslogtreecommitdiff
path: root/preproc
diff options
context:
space:
mode:
Diffstat (limited to 'preproc')
-rw-r--r--preproc/cmd/binarize/main.go78
-rw-r--r--preproc/helpers_test.go56
-rw-r--r--preproc/sauvola.go55
-rw-r--r--preproc/sauvola_test.go62
-rw-r--r--preproc/testdata/pg1.pngbin30803 -> 651071 bytes
-rw-r--r--preproc/testdata/pg1_integralsauvola_k0.3_w19.pngbin0 -> 19456 bytes
-rw-r--r--preproc/testdata/pg1_integralsauvola_k0.5_w19.pngbin0 -> 18241 bytes
-rw-r--r--preproc/testdata/pg1_integralsauvola_k0.5_w41.pngbin0 -> 18260 bytes
-rw-r--r--preproc/testdata/pg1_sauvola_k0.3_w19.pngbin0 -> 19447 bytes
-rw-r--r--preproc/testdata/pg1_sauvola_k0.5_w19.pngbin0 -> 18231 bytes
-rw-r--r--preproc/testdata/pg1_sauvola_k0.5_w41.pngbin0 -> 18275 bytes
-rw-r--r--preproc/testdata/pg2.pngbin0 -> 30803 bytes
-rw-r--r--preproc/testdata/pg2_integralwipesides_t0.02_w5.png (renamed from preproc/testdata/pg1_integralwipesides_t0.02_w5.png)bin33595 -> 33595 bytes
-rw-r--r--preproc/testdata/pg2_integralwipesides_t0.05_w25.png (renamed from preproc/testdata/pg1_integralwipesides_t0.05_w25.png)bin33432 -> 33432 bytes
-rw-r--r--preproc/testdata/pg2_integralwipesides_t0.05_w5.png (renamed from preproc/testdata/pg1_integralwipesides_t0.05_w5.png)bin14546 -> 14546 bytes
-rw-r--r--preproc/util.go95
-rw-r--r--preproc/wipesides.go4
-rw-r--r--preproc/wipesides_test.go52
18 files changed, 351 insertions, 51 deletions
diff --git a/preproc/cmd/binarize/main.go b/preproc/cmd/binarize/main.go
new file mode 100644
index 0000000..c274f9c
--- /dev/null
+++ b/preproc/cmd/binarize/main.go
@@ -0,0 +1,78 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "image"
+ "image/draw"
+ _ "image/jpeg"
+ "image/png"
+ "log"
+ "os"
+
+ "rescribe.xyz/go.git/preproc"
+)
+
+// TODO: do more testing to see how good this assumption is
+func autowsize(bounds image.Rectangle) int {
+ return bounds.Dx() / 60
+}
+
+func main() {
+ flag.Usage = func() {
+ fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n")
+ flag.PrintDefaults()
+ }
+ wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
+ ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
+ btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.")
+ flag.Parse()
+ if flag.NArg() < 2 {
+ flag.Usage()
+ os.Exit(1)
+ }
+
+ f, err := os.Open(flag.Arg(0))
+ defer f.Close()
+ if err != nil {
+ log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
+ }
+ img, _, err := image.Decode(f)
+ if err != nil {
+ log.Fatalf("Could not decode image: %v\n", err)
+ }
+ b := img.Bounds()
+ gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+ draw.Draw(gray, b, img, b.Min, draw.Src)
+
+ if *wsize == 0 {
+ *wsize = autowsize(b)
+ log.Printf("Set window size to %d\n", *wsize)
+ }
+
+ if *wsize % 2 == 0 {
+ *wsize++
+ }
+
+ // TODO: come up with a way to set a good ksize automatically
+
+ var thresh image.Image
+ thresh = preproc.IntegralSauvola(gray, *ksize, *wsize)
+
+ if *btype == "zeroinv" {
+ thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA))
+ if err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ f, err = os.Create(flag.Arg(1))
+ if err != nil {
+ log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
+ }
+ defer f.Close()
+ err = png.Encode(f, thresh)
+ if err != nil {
+ log.Fatalf("Could not encode image: %v\n", err)
+ }
+}
diff --git a/preproc/helpers_test.go b/preproc/helpers_test.go
new file mode 100644
index 0000000..326b59d
--- /dev/null
+++ b/preproc/helpers_test.go
@@ -0,0 +1,56 @@
+package preproc
+
+// TODO: add different pages as test cases
+// TODO: test non integral img version
+
+import (
+ "flag"
+ "image"
+ "image/draw"
+ "image/png"
+ "os"
+)
+
+var update = flag.Bool("update", false, "update golden files")
+
+func decode(s string) (*image.Gray, error) {
+ f, err := os.Open(s)
+ defer f.Close()
+ if err != nil {
+ return nil, err
+ }
+ img, err := png.Decode(f)
+ if err != nil {
+ return nil, err
+ }
+ b := img.Bounds()
+ gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
+ draw.Draw(gray, b, img, b.Min, draw.Src)
+ return gray, nil
+}
+
+func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
+ b := img1.Bounds()
+ if !b.Eq(img2.Bounds()) {
+ return false
+ }
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ for x := b.Min.X; x < b.Max.X; x++ {
+ r0, g0, b0, a0 := img1.At(x, y).RGBA()
+ r1, g1, b1, a1 := img2.At(x, y).RGBA()
+ if r0 != r1 {
+ return false
+ }
+ if g0 != g1 {
+ return false
+ }
+ if b0 != b1 {
+ return false
+ }
+ if a0 != a1 {
+ return false
+ }
+ }
+ }
+ return true
+}
diff --git a/preproc/sauvola.go b/preproc/sauvola.go
new file mode 100644
index 0000000..e93ea81
--- /dev/null
+++ b/preproc/sauvola.go
@@ -0,0 +1,55 @@
+package preproc
+
+import (
+ "image"
+ "image/color"
+
+ "rescribe.xyz/go.git/integralimg"
+)
+
+// Implements Sauvola's algorithm for text binarization, see paper
+// "Adaptive document image binarization" (2000)
+func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
+ b := img.Bounds()
+ new := image.NewGray(b)
+
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ for x := b.Min.X; x < b.Max.X; x++ {
+ window := surrounding(img, x, y, windowsize)
+ m, dev := meanstddev(window)
+ threshold := m * (1 + ksize * ((dev / 128) - 1))
+ if img.GrayAt(x, y).Y < uint8(threshold) {
+ new.SetGray(x, y, color.Gray{0})
+ } else {
+ new.SetGray(x, y, color.Gray{255})
+ }
+ }
+ }
+
+ return new
+}
+
+// Implements Sauvola's algorithm using Integral Images, see paper
+// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
+// and
+// https://stackoverflow.com/questions/13110733/computing-image-integral
+func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
+ b := img.Bounds()
+ new := image.NewGray(b)
+
+ integrals := integralimg.ToAllIntegralImg(img)
+
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ for x := b.Min.X; x < b.Max.X; x++ {
+ m, dev := integrals.MeanStdDevWindow(x, y, windowsize)
+ threshold := m * (1 + ksize * ((dev / 128) - 1))
+ if img.GrayAt(x, y).Y < uint8(threshold) {
+ new.SetGray(x, y, color.Gray{0})
+ } else {
+ new.SetGray(x, y, color.Gray{255})
+ }
+ }
+ }
+
+ return new
+}
diff --git a/preproc/sauvola_test.go b/preproc/sauvola_test.go
new file mode 100644
index 0000000..1397a4f
--- /dev/null
+++ b/preproc/sauvola_test.go
@@ -0,0 +1,62 @@
+package preproc
+
+import (
+ "fmt"
+ "image"
+ "image/png"
+ "os"
+ "testing"
+)
+
+func TestBinarization(t *testing.T) {
+ cases := []struct {
+ name string
+ orig string
+ golden string
+ ksize float64
+ wsize int
+ }{
+ {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41},
+ {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19},
+ {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19},
+ {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41},
+ {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19},
+ {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19},
+ }
+
+ for _, c := range cases {
+ t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) {
+ var actual *image.Gray
+ orig, err := decode(c.orig)
+ if err != nil {
+ t.Fatalf("Could not open file %s: %v\n", c.orig, err)
+ }
+ switch c.name {
+ case "integralsauvola":
+ actual = IntegralSauvola(orig, c.ksize, c.wsize)
+ case "sauvola":
+ actual = Sauvola(orig, c.ksize, c.wsize)
+ default:
+ t.Fatalf("No method %s\n", c.name)
+ }
+ if *update {
+ f, err := os.Create(c.golden)
+ defer f.Close()
+ if err != nil {
+ t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
+ }
+ err = png.Encode(f, actual)
+ if err != nil {
+ t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
+ }
+ }
+ golden, err := decode(c.golden)
+ if err != nil {
+ t.Fatalf("Could not open file %s: %v\n", c.golden, err)
+ }
+ if ! imgsequal(golden, actual) {
+ t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden)
+ }
+ })
+ }
+}
diff --git a/preproc/testdata/pg1.png b/preproc/testdata/pg1.png
index c7c4249..2bcc4b1 100644
--- a/preproc/testdata/pg1.png
+++ b/preproc/testdata/pg1.png
Binary files differ
diff --git a/preproc/testdata/pg1_integralsauvola_k0.3_w19.png b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png
new file mode 100644
index 0000000..bdf5712
--- /dev/null
+++ b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png
Binary files differ
diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w19.png b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png
new file mode 100644
index 0000000..5db2d9a
--- /dev/null
+++ b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png
Binary files differ
diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w41.png b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png
new file mode 100644
index 0000000..050d037
--- /dev/null
+++ b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png
Binary files differ
diff --git a/preproc/testdata/pg1_sauvola_k0.3_w19.png b/preproc/testdata/pg1_sauvola_k0.3_w19.png
new file mode 100644
index 0000000..bcd595f
--- /dev/null
+++ b/preproc/testdata/pg1_sauvola_k0.3_w19.png
Binary files differ
diff --git a/preproc/testdata/pg1_sauvola_k0.5_w19.png b/preproc/testdata/pg1_sauvola_k0.5_w19.png
new file mode 100644
index 0000000..8de596c
--- /dev/null
+++ b/preproc/testdata/pg1_sauvola_k0.5_w19.png
Binary files differ
diff --git a/preproc/testdata/pg1_sauvola_k0.5_w41.png b/preproc/testdata/pg1_sauvola_k0.5_w41.png
new file mode 100644
index 0000000..b8f50e0
--- /dev/null
+++ b/preproc/testdata/pg1_sauvola_k0.5_w41.png
Binary files differ
diff --git a/preproc/testdata/pg2.png b/preproc/testdata/pg2.png
new file mode 100644
index 0000000..c7c4249
--- /dev/null
+++ b/preproc/testdata/pg2.png
Binary files differ
diff --git a/preproc/testdata/pg1_integralwipesides_t0.02_w5.png b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png
index 6b4ccb2..6b4ccb2 100644
--- a/preproc/testdata/pg1_integralwipesides_t0.02_w5.png
+++ b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png
Binary files differ
diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w25.png b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png
index 39dc88d..39dc88d 100644
--- a/preproc/testdata/pg1_integralwipesides_t0.05_w25.png
+++ b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png
Binary files differ
diff --git a/preproc/testdata/pg1_integralwipesides_t0.05_w5.png b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png
index 50df855..50df855 100644
--- a/preproc/testdata/pg1_integralwipesides_t0.05_w5.png
+++ b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png
Binary files differ
diff --git a/preproc/util.go b/preproc/util.go
new file mode 100644
index 0000000..5f8a9f1
--- /dev/null
+++ b/preproc/util.go
@@ -0,0 +1,95 @@
+package preproc
+
+import (
+ "errors"
+ "image"
+ "math"
+)
+
+// TODO: name better; maybe verb, x-er
+// TODO: implement these for regular image, and use them to make
+// image functions generic for integral and non- images
+type UsefulImg interface {
+ MeanWindow()
+ MeanStdDevWindow()
+}
+
+func mean(i []int) float64 {
+ sum := 0
+ for _, n := range i {
+ sum += n
+ }
+ return float64(sum) / float64(len(i))
+}
+
+func stddev(i []int) float64 {
+ m := mean(i)
+
+ var sum float64
+ for _, n := range i {
+ sum += (float64(n) - m) * (float64(n) - m)
+ }
+ variance := sum / float64(len(i) - 1)
+ return math.Sqrt(variance)
+}
+
+func meanstddev(i []int) (float64, float64) {
+ m := mean(i)
+
+ var sum float64
+ for _, n := range i {
+ sum += (float64(n) - m) * (float64(n) - m)
+ }
+ variance := float64(sum) / float64(len(i) - 1)
+ return m, math.Sqrt(variance)
+}
+
+// gets the pixel values surrounding a point in the image
+func surrounding(img *image.Gray, x int, y int, size int) []int {
+ b := img.Bounds()
+ step := size / 2
+
+ miny := y - step
+ if miny < b.Min.Y {
+ miny = b.Min.Y
+ }
+ minx := x - step
+ if minx < b.Min.X {
+ minx = b.Min.X
+ }
+ maxy := y + step
+ if maxy > b.Max.Y {
+ maxy = b.Max.Y
+ }
+ maxx := x + step
+ if maxx > b.Max.X {
+ maxx = b.Max.X
+ }
+
+ var s []int
+ for yi := miny; yi <= maxy; yi++ {
+ for xi := minx; xi <= maxx; xi++ {
+ s = append(s, int(img.GrayAt(xi, yi).Y))
+ }
+ }
+ return s
+}
+
+func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) {
+ b := bin.Bounds()
+ if ! b.Eq(orig.Bounds()) {
+ return orig, errors.New("bin and orig images need to be the same dimensions")
+ }
+ newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy()))
+ for y := b.Min.Y; y < b.Max.Y; y++ {
+ for x := b.Min.X; x < b.Max.X; x++ {
+ if bin.GrayAt(x, y).Y == 255 {
+ newimg.Set(x, y, bin.GrayAt(x, y))
+ } else {
+ newimg.Set(x, y, orig.At(x, y))
+ }
+ }
+ }
+
+ return newimg, nil
+}
diff --git a/preproc/wipesides.go b/preproc/wipesides.go
index c773054..4806e93 100644
--- a/preproc/wipesides.go
+++ b/preproc/wipesides.go
@@ -7,7 +7,7 @@ import (
"image"
"image/color"
- "rescribe.xyz/go.git/binarize"
+ "rescribe.xyz/go.git/integralimg"
)
type IntWindow struct { // TODO: put this in its own package
@@ -126,7 +126,7 @@ func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {
// wipe fills the sections of image which fall outside the content
// area with white
func Wipe(img *image.Gray, wsize int, thresh float64) *image.Gray {
- integral := binarize.Integralimg(img)
+ integral := integralimg.ToIntegralImg(img)
lowedge, highedge := findedges(integral, wsize, thresh)
return wipesides(img, lowedge, highedge)
}
diff --git a/preproc/wipesides_test.go b/preproc/wipesides_test.go
index b0ada4e..f66f39b 100644
--- a/preproc/wipesides_test.go
+++ b/preproc/wipesides_test.go
@@ -4,59 +4,13 @@ package preproc
// TODO: test non integral img version
import (
- "flag"
"fmt"
"image"
- "image/draw"
"image/png"
"os"
"testing"
)
-var update = flag.Bool("update", false, "update golden files")
-
-func decode(s string) (*image.Gray, error) {
- f, err := os.Open(s)
- defer f.Close()
- if err != nil {
- return nil, err
- }
- img, err := png.Decode(f)
- if err != nil {
- return nil, err
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
- return gray, nil
-}
-
-func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
- b := img1.Bounds()
- if !b.Eq(img2.Bounds()) {
- return false
- }
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- r0, g0, b0, a0 := img1.At(x, y).RGBA()
- r1, g1, b1, a1 := img2.At(x, y).RGBA()
- if r0 != r1 {
- return false
- }
- if g0 != g1 {
- return false
- }
- if b0 != b1 {
- return false
- }
- if a0 != a1 {
- return false
- }
- }
- }
- return true
-}
-
func TestWipeSides(t *testing.T) {
cases := []struct {
name string
@@ -65,9 +19,9 @@ func TestWipeSides(t *testing.T) {
thresh float64
wsize int
}{
- {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.02_w5.png", 0.02, 5},
- {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w5.png", 0.05, 5},
- {"integralwipesides", "testdata/pg1.png", "testdata/pg1_integralwipesides_t0.05_w25.png", 0.05, 25},
+ {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.02_w5.png", 0.02, 5},
+ {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w5.png", 0.05, 5},
+ {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w25.png", 0.05, 25},
}
for _, c := range cases {