summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-02-26 16:53:14 +0000
committerNick White <git@njw.name>2020-02-26 16:53:14 +0000
commite320e069844b8f9c6e5d7e4e407cc90dff3b0f04 (patch)
treeb5e011fcd79997c89ae20f113885d483d3814db7
parenta47b35dc44f0e770a1bb1f7b53b6aa11d6895e0b (diff)
Move things around so that integralimg is in its own repository
-rw-r--r--README36
-rw-r--r--cmd/binarize/main.go82
-rw-r--r--cmd/preproc/main.go94
-rw-r--r--cmd/preprocmulti/main.go105
-rw-r--r--cmd/splittable/main.go143
-rw-r--r--cmd/wipe/main.go59
-rw-r--r--init_test.go19
-rw-r--r--integralimg.go (renamed from integralimg/integralimg.go)0
-rw-r--r--preprocmulti.go98
-rw-r--r--sauvola.go80
-rw-r--r--sauvola_test.go70
-rw-r--r--test_helpers.go57
-rw-r--r--testdata/pg1.pngbin651071 -> 0 bytes
-rw-r--r--testdata/pg1_integralsauvola_k0.3_w19.pngbin19456 -> 0 bytes
-rw-r--r--testdata/pg1_integralsauvola_k0.5_w19.pngbin18241 -> 0 bytes
-rw-r--r--testdata/pg1_integralsauvola_k0.5_w41.pngbin18260 -> 0 bytes
-rw-r--r--testdata/pg1_sauvola_k0.3_w19.pngbin19447 -> 0 bytes
-rw-r--r--testdata/pg1_sauvola_k0.5_w19.pngbin18231 -> 0 bytes
-rw-r--r--testdata/pg1_sauvola_k0.5_w41.pngbin18275 -> 0 bytes
-rw-r--r--testdata/pg2.pngbin30803 -> 0 bytes
-rw-r--r--testdata/pg2_integralwipesides_t0.02_w5.pngbin33595 -> 0 bytes
-rw-r--r--testdata/pg2_integralwipesides_t0.05_w25.pngbin33432 -> 0 bytes
-rw-r--r--testdata/pg2_integralwipesides_t0.05_w5.pngbin21695 -> 0 bytes
-rw-r--r--util.go99
-rw-r--r--wipesides.go164
-rw-r--r--wipesides_test.go59
26 files changed, 0 insertions, 1165 deletions
diff --git a/README b/README
deleted file mode 100644
index 4a93b1e..0000000
--- a/README
+++ /dev/null
@@ -1,36 +0,0 @@
-# rescribe.xyz/preproc package
-
-This package contains various image processing methods which are
-useful for preprocessing page images for OCR. It also contains
-several commands in the cmd/ directory which can be used to
-preprocess images directly.
-
-# Commands
-
-There are several commands in the cmd/ directory which are useful
-in their own right as well as serving as examples of using the
-package.
-
- - binarize : binarises an image using the sauvola algorithm
- - preproc : binarises and wipes an image
- - preprocmulti : binarises and wipes an image with multiple
- binarisation ksize values
- - wipe : wipes sections of an image that are outside an
- area detected as content
-
-# Bugs
-
-The integral image operations don't produce exactly the same result
-as their non-integral image counterparts. The difference is small
-enough that it has little effect on the output images, but it ought
-to be identical.
-
-# Contributions
-
-Any and all comments, bug reports, patches or pull requests would
-be very welcomely received. Please email them to <nick@rescribe.xyz>.
-
-# License
-
-This package is licensed under the GPLv3. See the LICENSE file for
-more details.
diff --git a/cmd/binarize/main.go b/cmd/binarize/main.go
deleted file mode 100644
index a844982..0000000
--- a/cmd/binarize/main.go
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n")
- flag.PrintDefaults()
- }
- wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
- ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
- btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *wsize == 0 {
- *wsize = autowsize(b)
- log.Printf("Set window size to %d\n", *wsize)
- }
-
- if *wsize%2 == 0 {
- *wsize++
- }
-
- // TODO: come up with a way to set a good ksize automatically
-
- var thresh image.Image
- thresh = preproc.IntegralSauvola(gray, *ksize, *wsize)
-
- if *btype == "zeroinv" {
- thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, thresh)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/cmd/preproc/main.go b/cmd/preproc/main.go
deleted file mode 100644
index 3323635..0000000
--- a/cmd/preproc/main.go
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-nowipe] [-wt wipethresh] [-ws wipesize] inimg outimg\n")
- fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n")
- flag.PrintDefaults()
- }
- binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
- ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
- btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
- wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
- thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *binwsize == 0 {
- *binwsize = autowsize(b)
- }
-
- if *binwsize%2 == 0 {
- *binwsize++
- }
-
- log.Print("Binarising")
- var clean, threshimg image.Image
- threshimg = preproc.IntegralSauvola(gray, *ksize, *binwsize)
-
- if *btype == "zeroinv" {
- threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- if !*nowipe {
- log.Print("Wiping sides")
- clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)
- } else {
- clean = threshimg
- }
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/cmd/preprocmulti/main.go b/cmd/preprocmulti/main.go
deleted file mode 100644
index aeac767..0000000
--- a/cmd/preprocmulti/main.go
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
- "rescribe.xyz/preproc/integralimg"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- ksizes := []float64{0.1, 0.2, 0.4, 0.5}
-
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preprocmulti [-bt bintype] [-bw winsize] [-m minperc] [-nowipe] [-ws wipesize] inimg outbase\n")
- fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n")
- fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n")
- fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes)
- flag.PrintDefaults()
- }
- binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
- btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
- wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- log.Printf("Opening %s\n", flag.Arg(0))
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *binwsize == 0 {
- *binwsize = autowsize(b)
- }
-
- if *binwsize%2 == 0 {
- *binwsize++
- }
-
- var clean, threshimg image.Image
- log.Print("Precalculating integral images")
- integrals := integralimg.ToAllIntegralImg(gray)
-
- for _, k := range ksizes {
- log.Print("Binarising")
- threshimg = preproc.PreCalcedSauvola(integrals, gray, k, *binwsize)
-
- if *btype == "zeroinv" {
- threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- if !*nowipe {
- log.Print("Wiping sides")
- clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, k*0.02, *min)
- } else {
- clean = threshimg
- }
-
- savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k)
- log.Printf("Saving %s\n", savefn)
- f, err = os.Create(savefn)
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", savefn, err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
- }
-}
diff --git a/cmd/splittable/main.go b/cmd/splittable/main.go
deleted file mode 100644
index e6df94a..0000000
--- a/cmd/splittable/main.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright 2020 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc/integralimg"
-)
-
-const usage = `Usage: splittable [-t thresh] [-w winsize] inimg outbase
-
-splittable is an experimental program to split a table into individual
-cells suitable for OCR. It does this by detecting lines. At present it
-just detects vertical lines and outputs images for each section
-between those lines.
-
-`
-
-// returns the proportion of the given window that is black pixels
-func proportion(i integralimg.I, x int, size int) float64 {
- w := i.GetVerticalWindow(x, size)
- return w.Proportion()
-}
-
-// findbestvline goes through every vertical line from x to x+w to
-// find the one with the lowest proportion of black pixels.
-func findbestvline(img integralimg.I, x int, w int) int {
- var bestx int
- var best float64
-
- if w == 1 {
- return x
- }
-
- right := x + w
- for ; x < right; x++ {
- prop := proportion(img, x, 1)
- if prop > best {
- best = prop
- bestx = x
- }
- }
-
- return bestx
-}
-
-// findvlines finds vertical lines, returning an array of x coordinates
-// for each line. It works by moving a window of wsize across the image,
-// marking each place where there is a higher proportion of black pixels
-// than thresh.
-func findvlines(img integralimg.I, wsize int, thresh float64) []int {
- maxx := len(img[0]) - 1
- var lines []int
-
- for x := 0; x < maxx-wsize; x+=wsize {
- if proportion(img, x, wsize) >= thresh {
- l := findbestvline(img, x, wsize)
- lines = append(lines, l)
- }
- }
-
- return lines
-}
-
-func drawsection(img *image.Gray, x1 int, x2 int) *image.Gray {
- b := img.Bounds()
- width := x2-x1
- new := image.NewGray(image.Rect(0, b.Min.Y, width, b.Max.Y))
-
- for x := 0; x < width; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, img.GrayAt(x1 + x, y))
- }
- }
-
- return new
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(flag.CommandLine.Output(), usage)
- flag.PrintDefaults()
- }
- thresh := flag.Float64("t", 0.85, "Threshold for the proportion of black pixels below which a window is determined to be a line. Higher means fewer lines will be found.")
- wsize := flag.Int("w", 1, "Window size for mask finding algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- integral := integralimg.ToIntegralImg(gray)
- vlines := findvlines(integral, *wsize, *thresh)
-
- for i, v := range vlines {
- fmt.Printf("line detected at x=%d\n", v)
-
- if i+1 >= len(vlines) {
- break
- }
- section := drawsection(gray, v, vlines[i+1])
-
- fn := fmt.Sprintf("%s-%d.png", flag.Arg(1), v)
- f, err = os.Create(fn)
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", fn, err)
- }
- defer f.Close()
- err := png.Encode(f, section)
- if err != nil {
- log.Fatalf("Could not encode image %s: %v\n", fn, err)
- }
- }
-
-
- // TODO: find horizontal lines too
- // TODO: do rotation
- // TODO: output table cells
- // TODO: potentially send cells straight to tesseract
-}
diff --git a/cmd/wipe/main.go b/cmd/wipe/main.go
deleted file mode 100644
index d4c95ac..0000000
--- a/cmd/wipe/main.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
-)
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n")
- fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n")
- flag.PrintDefaults()
- }
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge. Higher means more aggressive wiping.")
- wsize := flag.Int("w", 5, "Window size for mask finding algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- clean := preproc.Wipe(gray, *wsize, *thresh, *min)
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/init_test.go b/init_test.go
deleted file mode 100644
index 37e510b..0000000
--- a/init_test.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-import (
- "flag"
- "os"
- "testing"
-)
-
-var update = flag.Bool("update", false, "update golden files")
-
-// TestMain is needed to ensure flags are parsed
-func TestMain(m *testing.M) {
- flag.Parse()
- os.Exit(m.Run())
-}
diff --git a/integralimg/integralimg.go b/integralimg.go
index 5cfbaf3..5cfbaf3 100644
--- a/integralimg/integralimg.go
+++ b/integralimg.go
diff --git a/preprocmulti.go b/preprocmulti.go
deleted file mode 100644
index f4f7fbf..0000000
--- a/preprocmulti.go
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "os"
- "strings"
-
- "rescribe.xyz/preproc/integralimg"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-// PreProcMulti binarizes and preprocesses an image with multiple binarisation levels.
-// inPath: Path of input image.
-// ksizes: Slice of k values to pass to Sauvola algorithm
-// binType: Type of binarization threshold. binary or zeroinv are currently implemented.
-// binWsize: Window size for sauvola binarization algorithm. Set automatically based on resolution if 0.
-// wipe: Whether to wipe (clear sides) the image
-// wipeWsize: Window size for wiping algorithm
-// wipeMinWidthPerc: Minimum percentage of the image width for the content width calculation to be considered valid
-// Note: copied from cmd/preprocmulti/main.go, should think about the best way
-// to organise this code later.
-// TODO: return errors that encapsulate the err describing where it was encountered
-// TODO: do the post-integral image stuff in separate goroutines for speed
-func PreProcMulti(inPath string, ksizes []float64, binType string, binWsize int, wipe bool, wipeWsize int, wipeMinWidthPerc int) ([]string, error) {
- // Make outBase inPath up to final .
- s := strings.Split(inPath, ".")
- outBase := strings.Join(s[:len(s)-1], "")
-
- var donePaths []string
-
- f, err := os.Open(inPath)
- if err != nil {
- return donePaths, err
- }
- defer f.Close()
- img, _, err := image.Decode(f)
- if err != nil {
- return donePaths, err
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if binWsize == 0 {
- binWsize = autowsize(b)
- }
-
- if binWsize%2 == 0 {
- binWsize++
- }
-
- var clean, threshimg image.Image
- integrals := integralimg.ToAllIntegralImg(gray)
-
- for _, k := range ksizes {
- threshimg = PreCalcedSauvola(integrals, gray, k, binWsize)
-
- if binType == "zeroinv" {
- threshimg, err = BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- return donePaths, err
- }
- }
-
- if wipe {
- clean = Wipe(threshimg.(*image.Gray), wipeWsize, k*0.02, wipeMinWidthPerc)
- } else {
- clean = threshimg
- }
-
- savefn := fmt.Sprintf("%s_bin%0.1f.png", outBase, k)
- f, err = os.Create(savefn)
- if err != nil {
- return donePaths, err
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- return donePaths, err
- }
- donePaths = append(donePaths, savefn)
- }
- return donePaths, nil
-}
diff --git a/sauvola.go b/sauvola.go
deleted file mode 100644
index 5a9be03..0000000
--- a/sauvola.go
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-import (
- "image"
- "image/color"
-
- "rescribe.xyz/preproc/integralimg"
-)
-
-// Implements Sauvola's algorithm for text binarization, see paper
-// "Adaptive document image binarization" (2000)
-func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- window := surrounding(img, x, y, windowsize)
- m, dev := meanstddev(window)
- threshold := m * (1 + ksize*((dev/128)-1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
-
-// Implements Sauvola's algorithm using Integral Images, see paper
-// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
-// and
-// https://stackoverflow.com/questions/13110733/computing-image-integral
-func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- integrals := integralimg.ToAllIntegralImg(img)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- m, dev := integrals.MeanStdDevWindow(x, y, windowsize)
- threshold := m * (1 + ksize*((dev/128)-1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
-
-// PreCalcedSauvola Implements Sauvola's algorithm using precalculated Integral Images
-// TODO: have this be the root function that the other two reference
-func PreCalcedSauvola(integrals integralimg.WithSq, img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- m, dev := integrals.MeanStdDevWindow(x, y, windowsize)
- threshold := m * (1 + ksize*((dev/128)-1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
diff --git a/sauvola_test.go b/sauvola_test.go
deleted file mode 100644
index fd5da60..0000000
--- a/sauvola_test.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-import (
- "fmt"
- "image"
- "image/png"
- "os"
- "testing"
-)
-
-func TestBinarization(t *testing.T) {
- cases := []struct {
- name string
- orig string
- golden string
- ksize float64
- wsize int
- }{
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41},
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19},
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19},
- }
-
- for _, c := range cases {
- t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) {
- var actual *image.Gray
- orig, err := decode(c.orig)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.orig, err)
- }
- switch c.name {
- case "integralsauvola":
- actual = IntegralSauvola(orig, c.ksize, c.wsize)
- case "sauvola":
- if !testing.Short() {
- actual = Sauvola(orig, c.ksize, c.wsize)
- } else {
- t.Skip("Skipping long test due to -short flag.\n")
- }
- default:
- t.Fatalf("No method %s\n", c.name)
- }
- if *update {
- f, err := os.Create(c.golden)
- defer f.Close()
- if err != nil {
- t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
- }
- err = png.Encode(f, actual)
- if err != nil {
- t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
- }
- }
- golden, err := decode(c.golden)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.golden, err)
- }
- if !imgsequal(golden, actual) {
- t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden)
- }
- })
- }
-}
diff --git a/test_helpers.go b/test_helpers.go
deleted file mode 100644
index 97a43dd..0000000
--- a/test_helpers.go
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-// TODO: add different pages as test cases
-// TODO: test non integral img version
-
-import (
- "image"
- "image/draw"
- "image/png"
- "os"
-)
-
-func decode(s string) (*image.Gray, error) {
- f, err := os.Open(s)
- defer f.Close()
- if err != nil {
- return nil, err
- }
- img, err := png.Decode(f)
- if err != nil {
- return nil, err
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
- return gray, nil
-}
-
-func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
- b := img1.Bounds()
- if !b.Eq(img2.Bounds()) {
- return false
- }
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- r0, g0, b0, a0 := img1.At(x, y).RGBA()
- r1, g1, b1, a1 := img2.At(x, y).RGBA()
- if r0 != r1 {
- return false
- }
- if g0 != g1 {
- return false
- }
- if b0 != b1 {
- return false
- }
- if a0 != a1 {
- return false
- }
- }
- }
- return true
-}
diff --git a/testdata/pg1.png b/testdata/pg1.png
deleted file mode 100644
index 2bcc4b1..0000000
--- a/testdata/pg1.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg1_integralsauvola_k0.3_w19.png b/testdata/pg1_integralsauvola_k0.3_w19.png
deleted file mode 100644
index bdf5712..0000000
--- a/testdata/pg1_integralsauvola_k0.3_w19.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg1_integralsauvola_k0.5_w19.png b/testdata/pg1_integralsauvola_k0.5_w19.png
deleted file mode 100644
index 5db2d9a..0000000
--- a/testdata/pg1_integralsauvola_k0.5_w19.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg1_integralsauvola_k0.5_w41.png b/testdata/pg1_integralsauvola_k0.5_w41.png
deleted file mode 100644
index 050d037..0000000
--- a/testdata/pg1_integralsauvola_k0.5_w41.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg1_sauvola_k0.3_w19.png b/testdata/pg1_sauvola_k0.3_w19.png
deleted file mode 100644
index bcd595f..0000000
--- a/testdata/pg1_sauvola_k0.3_w19.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg1_sauvola_k0.5_w19.png b/testdata/pg1_sauvola_k0.5_w19.png
deleted file mode 100644
index 8de596c..0000000
--- a/testdata/pg1_sauvola_k0.5_w19.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg1_sauvola_k0.5_w41.png b/testdata/pg1_sauvola_k0.5_w41.png
deleted file mode 100644
index b8f50e0..0000000
--- a/testdata/pg1_sauvola_k0.5_w41.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg2.png b/testdata/pg2.png
deleted file mode 100644
index c7c4249..0000000
--- a/testdata/pg2.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg2_integralwipesides_t0.02_w5.png b/testdata/pg2_integralwipesides_t0.02_w5.png
deleted file mode 100644
index 6b4ccb2..0000000
--- a/testdata/pg2_integralwipesides_t0.02_w5.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg2_integralwipesides_t0.05_w25.png b/testdata/pg2_integralwipesides_t0.05_w25.png
deleted file mode 100644
index 39dc88d..0000000
--- a/testdata/pg2_integralwipesides_t0.05_w25.png
+++ /dev/null
Binary files differ
diff --git a/testdata/pg2_integralwipesides_t0.05_w5.png b/testdata/pg2_integralwipesides_t0.05_w5.png
deleted file mode 100644
index 3a0452f..0000000
--- a/testdata/pg2_integralwipesides_t0.05_w5.png
+++ /dev/null
Binary files differ
diff --git a/util.go b/util.go
deleted file mode 100644
index 3ce4f84..0000000
--- a/util.go
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-import (
- "errors"
- "image"
- "math"
-)
-
-// TODO: name better; maybe verb, x-er
-// TODO: implement these for regular image, and use them to make
-// image functions generic for integral and non- images
-type UsefulImg interface {
- MeanWindow()
- MeanStdDevWindow()
-}
-
-func mean(i []int) float64 {
- sum := 0
- for _, n := range i {
- sum += n
- }
- return float64(sum) / float64(len(i))
-}
-
-func stddev(i []int) float64 {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := sum / float64(len(i)-1)
- return math.Sqrt(variance)
-}
-
-func meanstddev(i []int) (float64, float64) {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := float64(sum) / float64(len(i)-1)
- return m, math.Sqrt(variance)
-}
-
-// gets the pixel values surrounding a point in the image
-func surrounding(img *image.Gray, x int, y int, size int) []int {
- b := img.Bounds()
- step := size / 2
-
- miny := y - step
- if miny < b.Min.Y {
- miny = b.Min.Y
- }
- minx := x - step
- if minx < b.Min.X {
- minx = b.Min.X
- }
- maxy := y + step
- if maxy > b.Max.Y {
- maxy = b.Max.Y
- }
- maxx := x + step
- if maxx > b.Max.X {
- maxx = b.Max.X
- }
-
- var s []int
- for yi := miny; yi <= maxy; yi++ {
- for xi := minx; xi <= maxx; xi++ {
- s = append(s, int(img.GrayAt(xi, yi).Y))
- }
- }
- return s
-}
-
-func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) {
- b := bin.Bounds()
- if !b.Eq(orig.Bounds()) {
- return orig, errors.New("bin and orig images need to be the same dimensions")
- }
- newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy()))
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- if bin.GrayAt(x, y).Y == 255 {
- newimg.Set(x, y, bin.GrayAt(x, y))
- } else {
- newimg.Set(x, y, orig.At(x, y))
- }
- }
- }
-
- return newimg, nil
-}
diff --git a/wipesides.go b/wipesides.go
deleted file mode 100644
index 79a68b8..0000000
--- a/wipesides.go
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-// TODO: add minimum size variable (default ~30%?)
-// TODO: switch to an interface rather than integralimg.I
-
-import (
- "errors"
- "fmt"
- "image"
- "image/color"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "os"
-
- "rescribe.xyz/preproc/integralimg"
-)
-
-// returns the proportion of the given window that is black pixels
-func proportion(i integralimg.I, x int, size int) float64 {
- w := i.GetVerticalWindow(x, size)
- return w.Proportion()
-}
-
-// findbestedge goes through every vertical line from x to x+w to
-// find the one with the lowest proportion of black pixels.
-func findbestedge(img integralimg.I, x int, w int) int {
- var bestx int
- var best float64
-
- if w == 1 {
- return x
- }
-
- right := x + w
- for ; x < right; x++ {
- prop := proportion(img, x, 1)
- if prop > best {
- best = prop
- bestx = x
- }
- }
-
- return bestx
-}
-
-// findedges finds the edges of the main content, by moving a window of wsize
-// from near the middle of the image to the left and right, stopping when it reaches
-// a point at which there is a lower proportion of black pixels than thresh.
-func findedges(img integralimg.I, wsize int, thresh float64) (int, int) {
- maxx := len(img[0]) - 1
- var lowedge, highedge int = 0, maxx
-
- // don't start at the middle, as this will fail for 2 column layouts,
- // start 10% left or right of the middle
- notcentre := maxx / 10
-
- for x := maxx/2 + notcentre; x < maxx-wsize; x++ {
- if proportion(img, x, wsize) <= thresh {
- highedge = findbestedge(img, x, wsize)
- break
- }
- }
-
- for x := maxx/2 - notcentre; x > 0; x-- {
- if proportion(img, x, wsize) <= thresh {
- lowedge = findbestedge(img, x, wsize)
- break
- }
- }
-
- return lowedge, highedge
-}
-
-// wipesides fills the sections of image not within the boundaries
-// of lowedge and highedge with white
-func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- // set left edge white
- for x := b.Min.X; x < lowedge; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- // copy middle
- for x := lowedge; x < highedge; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, img.GrayAt(x, y))
- }
- }
- // set right edge white
- for x := highedge; x < b.Max.X; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, color.Gray{255})
- }
- }
-
- return new
-}
-
-// toonarrow checks whether the area between lowedge and highedge is
-// less than min % of the total image width
-func toonarrow(img *image.Gray, lowedge int, highedge int, min int) bool {
- b := img.Bounds()
- imgw := b.Max.X - b.Min.X
- wipew := highedge - lowedge
- if float64(wipew)/float64(imgw)*100 < float64(min) {
- return true
- }
- return false
-}
-
-// Wipe fills the sections of image which fall outside the content
-// area with white, providing the content area is above min %
-func Wipe(img *image.Gray, wsize int, thresh float64, min int) *image.Gray {
- integral := integralimg.ToIntegralImg(img)
- lowedge, highedge := findedges(integral, wsize, thresh)
- if toonarrow(img, lowedge, highedge, min) {
- return img
- }
- return wipesides(img, lowedge, highedge)
-}
-
-// WipeFile wipes an image file, filling the sections of the image
-// which fall outside the content area with white, providing the
-// content area is above min %.
-// inPath: path of the input image.
-// outPath: path to save the output image.
-// wsize: window size for wipe algorithm.
-// thresh: threshold for wipe algorithm.
-// min: minimum % of content area width to consider valid.
-func WipeFile(inPath string, outPath string, wsize int, thresh float64, min int) error {
- f, err := os.Open(inPath)
- defer f.Close()
- if err != nil {
- return errors.New(fmt.Sprintf("Could not open file %s: %v", inPath, err))
- }
- img, _, err := image.Decode(f)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not decode image: %v", err))
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- clean := Wipe(gray, wsize, thresh, min)
-
- f, err = os.Create(outPath)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not create file %s: %v", outPath, err))
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not encode image: %v", err))
- }
- return nil
-}
diff --git a/wipesides_test.go b/wipesides_test.go
deleted file mode 100644
index 4906c5b..0000000
--- a/wipesides_test.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2019-2020 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package preproc
-
-// TODO: add different pages as test cases
-// TODO: test non integral img version
-
-import (
- "fmt"
- "image"
- "image/png"
- "os"
- "testing"
-)
-
-func TestWipeSides(t *testing.T) {
- cases := []struct {
- name string
- orig string
- golden string
- thresh float64
- wsize int
- }{
- {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.02_w5.png", 0.02, 5},
- {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w5.png", 0.05, 5},
- {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w25.png", 0.05, 25},
- }
-
- for _, c := range cases {
- t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) {
- var actual *image.Gray
- orig, err := decode(c.orig)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.orig, err)
- }
- actual = Wipe(orig, c.wsize, c.thresh, 30)
- if *update {
- f, err := os.Create(c.golden)
- defer f.Close()
- if err != nil {
- t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
- }
- err = png.Encode(f, actual)
- if err != nil {
- t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
- }
- }
- golden, err := decode(c.golden)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.golden, err)
- }
- if !imgsequal(golden, actual) {
- t.Errorf("Processed %s differs to %s\n", c.orig, c.golden)
- }
- })
- }
-}