diff options
26 files changed, 0 insertions, 1165 deletions
@@ -1,36 +0,0 @@ -# rescribe.xyz/preproc package - -This package contains various image processing methods which are -useful for preprocessing page images for OCR. It also contains -several commands in the cmd/ directory which can be used to -preprocess images directly. - -# Commands - -There are several commands in the cmd/ directory which are useful -in their own right as well as serving as examples of using the -package. - - - binarize : binarises an image using the sauvola algorithm - - preproc : binarises and wipes an image - - preprocmulti : binarises and wipes an image with multiple - binarisation ksize values - - wipe : wipes sections of an image that are outside an - area detected as content - -# Bugs - -The integral image operations don't produce exactly the same result -as their non-integral image counterparts. The difference is small -enough that it has little effect on the output images, but it ought -to be identical. - -# Contributions - -Any and all comments, bug reports, patches or pull requests would -be very welcomely received. Please email them to <nick@rescribe.xyz>. - -# License - -This package is licensed under the GPLv3. See the LICENSE file for -more details. diff --git a/cmd/binarize/main.go b/cmd/binarize/main.go deleted file mode 100644 index a844982..0000000 --- a/cmd/binarize/main.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package main - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/preproc" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n") - flag.PrintDefaults() - } - wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.") - ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") - btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *wsize == 0 { - *wsize = autowsize(b) - log.Printf("Set window size to %d\n", *wsize) - } - - if *wsize%2 == 0 { - *wsize++ - } - - // TODO: come up with a way to set a good ksize automatically - - var thresh image.Image - thresh = preproc.IntegralSauvola(gray, *ksize, *wsize) - - if *btype == "zeroinv" { - thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, thresh) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/cmd/preproc/main.go b/cmd/preproc/main.go deleted file mode 100644 index 3323635..0000000 --- a/cmd/preproc/main.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package main - -// TODO: come up with a way to set a good ksize automatically - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/preproc" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-nowipe] [-wt wipethresh] [-ws wipesize] inimg outimg\n") - fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n") - flag.PrintDefaults() - } - binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.") - ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") - btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.") - min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.") - nowipe := flag.Bool("nowipe", false, "Disable wiping completely.") - wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.") - thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *binwsize == 0 { - *binwsize = autowsize(b) - } - - if *binwsize%2 == 0 { - *binwsize++ - } - - log.Print("Binarising") - var clean, threshimg image.Image - threshimg = preproc.IntegralSauvola(gray, *ksize, *binwsize) - - if *btype == "zeroinv" { - threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - if !*nowipe { - log.Print("Wiping sides") - clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min) - } else { - clean = threshimg - } - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/cmd/preprocmulti/main.go b/cmd/preprocmulti/main.go deleted file mode 100644 index aeac767..0000000 --- a/cmd/preprocmulti/main.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package main - -// TODO: come up with a way to set a good ksize automatically - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/preproc" - "rescribe.xyz/preproc/integralimg" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - ksizes := []float64{0.1, 0.2, 0.4, 0.5} - - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: preprocmulti [-bt bintype] [-bw winsize] [-m minperc] [-nowipe] [-ws wipesize] inimg outbase\n") - fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n") - fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n") - fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes) - flag.PrintDefaults() - } - binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.") - btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.") - min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.") - nowipe := flag.Bool("nowipe", false, "Disable wiping completely.") - wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - log.Printf("Opening %s\n", flag.Arg(0)) - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *binwsize == 0 { - *binwsize = autowsize(b) - } - - if *binwsize%2 == 0 { - *binwsize++ - } - - var clean, threshimg image.Image - log.Print("Precalculating integral images") - integrals := integralimg.ToAllIntegralImg(gray) - - for _, k := range ksizes { - log.Print("Binarising") - threshimg = preproc.PreCalcedSauvola(integrals, gray, k, *binwsize) - - if *btype == "zeroinv" { - threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - if !*nowipe { - log.Print("Wiping sides") - clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, k*0.02, *min) - } else { - clean = threshimg - } - - savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k) - log.Printf("Saving %s\n", savefn) - f, err = os.Create(savefn) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", savefn, err) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } - } -} diff --git a/cmd/splittable/main.go b/cmd/splittable/main.go deleted file mode 100644 index e6df94a..0000000 --- a/cmd/splittable/main.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2020 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package main - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/preproc/integralimg" -) - -const usage = `Usage: splittable [-t thresh] [-w winsize] inimg outbase - -splittable is an experimental program to split a table into individual -cells suitable for OCR. It does this by detecting lines. At present it -just detects vertical lines and outputs images for each section -between those lines. - -` - -// returns the proportion of the given window that is black pixels -func proportion(i integralimg.I, x int, size int) float64 { - w := i.GetVerticalWindow(x, size) - return w.Proportion() -} - -// findbestvline goes through every vertical line from x to x+w to -// find the one with the lowest proportion of black pixels. -func findbestvline(img integralimg.I, x int, w int) int { - var bestx int - var best float64 - - if w == 1 { - return x - } - - right := x + w - for ; x < right; x++ { - prop := proportion(img, x, 1) - if prop > best { - best = prop - bestx = x - } - } - - return bestx -} - -// findvlines finds vertical lines, returning an array of x coordinates -// for each line. It works by moving a window of wsize across the image, -// marking each place where there is a higher proportion of black pixels -// than thresh. -func findvlines(img integralimg.I, wsize int, thresh float64) []int { - maxx := len(img[0]) - 1 - var lines []int - - for x := 0; x < maxx-wsize; x+=wsize { - if proportion(img, x, wsize) >= thresh { - l := findbestvline(img, x, wsize) - lines = append(lines, l) - } - } - - return lines -} - -func drawsection(img *image.Gray, x1 int, x2 int) *image.Gray { - b := img.Bounds() - width := x2-x1 - new := image.NewGray(image.Rect(0, b.Min.Y, width, b.Max.Y)) - - for x := 0; x < width; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, img.GrayAt(x1 + x, y)) - } - } - - return new -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(flag.CommandLine.Output(), usage) - flag.PrintDefaults() - } - thresh := flag.Float64("t", 0.85, "Threshold for the proportion of black pixels below which a window is determined to be a line. Higher means fewer lines will be found.") - wsize := flag.Int("w", 1, "Window size for mask finding algorithm.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - integral := integralimg.ToIntegralImg(gray) - vlines := findvlines(integral, *wsize, *thresh) - - for i, v := range vlines { - fmt.Printf("line detected at x=%d\n", v) - - if i+1 >= len(vlines) { - break - } - section := drawsection(gray, v, vlines[i+1]) - - fn := fmt.Sprintf("%s-%d.png", flag.Arg(1), v) - f, err = os.Create(fn) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", fn, err) - } - defer f.Close() - err := png.Encode(f, section) - if err != nil { - log.Fatalf("Could not encode image %s: %v\n", fn, err) - } - } - - - // TODO: find horizontal lines too - // TODO: do rotation - // TODO: output table cells - // TODO: potentially send cells straight to tesseract -} diff --git a/cmd/wipe/main.go b/cmd/wipe/main.go deleted file mode 100644 index d4c95ac..0000000 --- a/cmd/wipe/main.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package main - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/preproc" -) - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n") - fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n") - flag.PrintDefaults() - } - min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.") - thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge. Higher means more aggressive wiping.") - wsize := flag.Int("w", 5, "Window size for mask finding algorithm.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - clean := preproc.Wipe(gray, *wsize, *thresh, *min) - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/init_test.go b/init_test.go deleted file mode 100644 index 37e510b..0000000 --- a/init_test.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -import ( - "flag" - "os" - "testing" -) - -var update = flag.Bool("update", false, "update golden files") - -// TestMain is needed to ensure flags are parsed -func TestMain(m *testing.M) { - flag.Parse() - os.Exit(m.Run()) -} diff --git a/integralimg/integralimg.go b/integralimg.go index 5cfbaf3..5cfbaf3 100644 --- a/integralimg/integralimg.go +++ b/integralimg.go diff --git a/preprocmulti.go b/preprocmulti.go deleted file mode 100644 index f4f7fbf..0000000 --- a/preprocmulti.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -// TODO: come up with a way to set a good ksize automatically - -import ( - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "os" - "strings" - - "rescribe.xyz/preproc/integralimg" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -// PreProcMulti binarizes and preprocesses an image with multiple binarisation levels. -// inPath: Path of input image. -// ksizes: Slice of k values to pass to Sauvola algorithm -// binType: Type of binarization threshold. binary or zeroinv are currently implemented. -// binWsize: Window size for sauvola binarization algorithm. Set automatically based on resolution if 0. -// wipe: Whether to wipe (clear sides) the image -// wipeWsize: Window size for wiping algorithm -// wipeMinWidthPerc: Minimum percentage of the image width for the content width calculation to be considered valid -// Note: copied from cmd/preprocmulti/main.go, should think about the best way -// to organise this code later. -// TODO: return errors that encapsulate the err describing where it was encountered -// TODO: do the post-integral image stuff in separate goroutines for speed -func PreProcMulti(inPath string, ksizes []float64, binType string, binWsize int, wipe bool, wipeWsize int, wipeMinWidthPerc int) ([]string, error) { - // Make outBase inPath up to final . - s := strings.Split(inPath, ".") - outBase := strings.Join(s[:len(s)-1], "") - - var donePaths []string - - f, err := os.Open(inPath) - if err != nil { - return donePaths, err - } - defer f.Close() - img, _, err := image.Decode(f) - if err != nil { - return donePaths, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if binWsize == 0 { - binWsize = autowsize(b) - } - - if binWsize%2 == 0 { - binWsize++ - } - - var clean, threshimg image.Image - integrals := integralimg.ToAllIntegralImg(gray) - - for _, k := range ksizes { - threshimg = PreCalcedSauvola(integrals, gray, k, binWsize) - - if binType == "zeroinv" { - threshimg, err = BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA)) - if err != nil { - return donePaths, err - } - } - - if wipe { - clean = Wipe(threshimg.(*image.Gray), wipeWsize, k*0.02, wipeMinWidthPerc) - } else { - clean = threshimg - } - - savefn := fmt.Sprintf("%s_bin%0.1f.png", outBase, k) - f, err = os.Create(savefn) - if err != nil { - return donePaths, err - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - return donePaths, err - } - donePaths = append(donePaths, savefn) - } - return donePaths, nil -} diff --git a/sauvola.go b/sauvola.go deleted file mode 100644 index 5a9be03..0000000 --- a/sauvola.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -import ( - "image" - "image/color" - - "rescribe.xyz/preproc/integralimg" -) - -// Implements Sauvola's algorithm for text binarization, see paper -// "Adaptive document image binarization" (2000) -func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - window := surrounding(img, x, y, windowsize) - m, dev := meanstddev(window) - threshold := m * (1 + ksize*((dev/128)-1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} - -// Implements Sauvola's algorithm using Integral Images, see paper -// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" -// and -// https://stackoverflow.com/questions/13110733/computing-image-integral -func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - integrals := integralimg.ToAllIntegralImg(img) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - m, dev := integrals.MeanStdDevWindow(x, y, windowsize) - threshold := m * (1 + ksize*((dev/128)-1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} - -// PreCalcedSauvola Implements Sauvola's algorithm using precalculated Integral Images -// TODO: have this be the root function that the other two reference -func PreCalcedSauvola(integrals integralimg.WithSq, img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - m, dev := integrals.MeanStdDevWindow(x, y, windowsize) - threshold := m * (1 + ksize*((dev/128)-1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} diff --git a/sauvola_test.go b/sauvola_test.go deleted file mode 100644 index fd5da60..0000000 --- a/sauvola_test.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -import ( - "fmt" - "image" - "image/png" - "os" - "testing" -) - -func TestBinarization(t *testing.T) { - cases := []struct { - name string - orig string - golden string - ksize float64 - wsize int - }{ - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19}, - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) { - var actual *image.Gray - orig, err := decode(c.orig) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.orig, err) - } - switch c.name { - case "integralsauvola": - actual = IntegralSauvola(orig, c.ksize, c.wsize) - case "sauvola": - if !testing.Short() { - actual = Sauvola(orig, c.ksize, c.wsize) - } else { - t.Skip("Skipping long test due to -short flag.\n") - } - default: - t.Fatalf("No method %s\n", c.name) - } - if *update { - f, err := os.Create(c.golden) - defer f.Close() - if err != nil { - t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) - } - err = png.Encode(f, actual) - if err != nil { - t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) - } - } - golden, err := decode(c.golden) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.golden, err) - } - if !imgsequal(golden, actual) { - t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden) - } - }) - } -} diff --git a/test_helpers.go b/test_helpers.go deleted file mode 100644 index 97a43dd..0000000 --- a/test_helpers.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -// TODO: add different pages as test cases -// TODO: test non integral img version - -import ( - "image" - "image/draw" - "image/png" - "os" -) - -func decode(s string) (*image.Gray, error) { - f, err := os.Open(s) - defer f.Close() - if err != nil { - return nil, err - } - img, err := png.Decode(f) - if err != nil { - return nil, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - return gray, nil -} - -func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { - b := img1.Bounds() - if !b.Eq(img2.Bounds()) { - return false - } - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - r0, g0, b0, a0 := img1.At(x, y).RGBA() - r1, g1, b1, a1 := img2.At(x, y).RGBA() - if r0 != r1 { - return false - } - if g0 != g1 { - return false - } - if b0 != b1 { - return false - } - if a0 != a1 { - return false - } - } - } - return true -} diff --git a/testdata/pg1.png b/testdata/pg1.png Binary files differdeleted file mode 100644 index 2bcc4b1..0000000 --- a/testdata/pg1.png +++ /dev/null diff --git a/testdata/pg1_integralsauvola_k0.3_w19.png b/testdata/pg1_integralsauvola_k0.3_w19.png Binary files differdeleted file mode 100644 index bdf5712..0000000 --- a/testdata/pg1_integralsauvola_k0.3_w19.png +++ /dev/null diff --git a/testdata/pg1_integralsauvola_k0.5_w19.png b/testdata/pg1_integralsauvola_k0.5_w19.png Binary files differdeleted file mode 100644 index 5db2d9a..0000000 --- a/testdata/pg1_integralsauvola_k0.5_w19.png +++ /dev/null diff --git a/testdata/pg1_integralsauvola_k0.5_w41.png b/testdata/pg1_integralsauvola_k0.5_w41.png Binary files differdeleted file mode 100644 index 050d037..0000000 --- a/testdata/pg1_integralsauvola_k0.5_w41.png +++ /dev/null diff --git a/testdata/pg1_sauvola_k0.3_w19.png b/testdata/pg1_sauvola_k0.3_w19.png Binary files differdeleted file mode 100644 index bcd595f..0000000 --- a/testdata/pg1_sauvola_k0.3_w19.png +++ /dev/null diff --git a/testdata/pg1_sauvola_k0.5_w19.png b/testdata/pg1_sauvola_k0.5_w19.png Binary files differdeleted file mode 100644 index 8de596c..0000000 --- a/testdata/pg1_sauvola_k0.5_w19.png +++ /dev/null diff --git a/testdata/pg1_sauvola_k0.5_w41.png b/testdata/pg1_sauvola_k0.5_w41.png Binary files differdeleted file mode 100644 index b8f50e0..0000000 --- a/testdata/pg1_sauvola_k0.5_w41.png +++ /dev/null diff --git a/testdata/pg2.png b/testdata/pg2.png Binary files differdeleted file mode 100644 index c7c4249..0000000 --- a/testdata/pg2.png +++ /dev/null diff --git a/testdata/pg2_integralwipesides_t0.02_w5.png b/testdata/pg2_integralwipesides_t0.02_w5.png Binary files differdeleted file mode 100644 index 6b4ccb2..0000000 --- a/testdata/pg2_integralwipesides_t0.02_w5.png +++ /dev/null diff --git a/testdata/pg2_integralwipesides_t0.05_w25.png b/testdata/pg2_integralwipesides_t0.05_w25.png Binary files differdeleted file mode 100644 index 39dc88d..0000000 --- a/testdata/pg2_integralwipesides_t0.05_w25.png +++ /dev/null diff --git a/testdata/pg2_integralwipesides_t0.05_w5.png b/testdata/pg2_integralwipesides_t0.05_w5.png Binary files differdeleted file mode 100644 index 3a0452f..0000000 --- a/testdata/pg2_integralwipesides_t0.05_w5.png +++ /dev/null diff --git a/util.go b/util.go deleted file mode 100644 index 3ce4f84..0000000 --- a/util.go +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -import ( - "errors" - "image" - "math" -) - -// TODO: name better; maybe verb, x-er -// TODO: implement these for regular image, and use them to make -// image functions generic for integral and non- images -type UsefulImg interface { - MeanWindow() - MeanStdDevWindow() -} - -func mean(i []int) float64 { - sum := 0 - for _, n := range i { - sum += n - } - return float64(sum) / float64(len(i)) -} - -func stddev(i []int) float64 { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := sum / float64(len(i)-1) - return math.Sqrt(variance) -} - -func meanstddev(i []int) (float64, float64) { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i)-1) - return m, math.Sqrt(variance) -} - -// gets the pixel values surrounding a point in the image -func surrounding(img *image.Gray, x int, y int, size int) []int { - b := img.Bounds() - step := size / 2 - - miny := y - step - if miny < b.Min.Y { - miny = b.Min.Y - } - minx := x - step - if minx < b.Min.X { - minx = b.Min.X - } - maxy := y + step - if maxy > b.Max.Y { - maxy = b.Max.Y - } - maxx := x + step - if maxx > b.Max.X { - maxx = b.Max.X - } - - var s []int - for yi := miny; yi <= maxy; yi++ { - for xi := minx; xi <= maxx; xi++ { - s = append(s, int(img.GrayAt(xi, yi).Y)) - } - } - return s -} - -func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) { - b := bin.Bounds() - if !b.Eq(orig.Bounds()) { - return orig, errors.New("bin and orig images need to be the same dimensions") - } - newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy())) - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - if bin.GrayAt(x, y).Y == 255 { - newimg.Set(x, y, bin.GrayAt(x, y)) - } else { - newimg.Set(x, y, orig.At(x, y)) - } - } - } - - return newimg, nil -} diff --git a/wipesides.go b/wipesides.go deleted file mode 100644 index 79a68b8..0000000 --- a/wipesides.go +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright 2019 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -// TODO: add minimum size variable (default ~30%?) -// TODO: switch to an interface rather than integralimg.I - -import ( - "errors" - "fmt" - "image" - "image/color" - "image/draw" - _ "image/jpeg" - "image/png" - "os" - - "rescribe.xyz/preproc/integralimg" -) - -// returns the proportion of the given window that is black pixels -func proportion(i integralimg.I, x int, size int) float64 { - w := i.GetVerticalWindow(x, size) - return w.Proportion() -} - -// findbestedge goes through every vertical line from x to x+w to -// find the one with the lowest proportion of black pixels. -func findbestedge(img integralimg.I, x int, w int) int { - var bestx int - var best float64 - - if w == 1 { - return x - } - - right := x + w - for ; x < right; x++ { - prop := proportion(img, x, 1) - if prop > best { - best = prop - bestx = x - } - } - - return bestx -} - -// findedges finds the edges of the main content, by moving a window of wsize -// from near the middle of the image to the left and right, stopping when it reaches -// a point at which there is a lower proportion of black pixels than thresh. -func findedges(img integralimg.I, wsize int, thresh float64) (int, int) { - maxx := len(img[0]) - 1 - var lowedge, highedge int = 0, maxx - - // don't start at the middle, as this will fail for 2 column layouts, - // start 10% left or right of the middle - notcentre := maxx / 10 - - for x := maxx/2 + notcentre; x < maxx-wsize; x++ { - if proportion(img, x, wsize) <= thresh { - highedge = findbestedge(img, x, wsize) - break - } - } - - for x := maxx/2 - notcentre; x > 0; x-- { - if proportion(img, x, wsize) <= thresh { - lowedge = findbestedge(img, x, wsize) - break - } - } - - return lowedge, highedge -} - -// wipesides fills the sections of image not within the boundaries -// of lowedge and highedge with white -func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - // set left edge white - for x := b.Min.X; x < lowedge; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, color.Gray{255}) - } - } - // copy middle - for x := lowedge; x < highedge; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, img.GrayAt(x, y)) - } - } - // set right edge white - for x := highedge; x < b.Max.X; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, color.Gray{255}) - } - } - - return new -} - -// toonarrow checks whether the area between lowedge and highedge is -// less than min % of the total image width -func toonarrow(img *image.Gray, lowedge int, highedge int, min int) bool { - b := img.Bounds() - imgw := b.Max.X - b.Min.X - wipew := highedge - lowedge - if float64(wipew)/float64(imgw)*100 < float64(min) { - return true - } - return false -} - -// Wipe fills the sections of image which fall outside the content -// area with white, providing the content area is above min % -func Wipe(img *image.Gray, wsize int, thresh float64, min int) *image.Gray { - integral := integralimg.ToIntegralImg(img) - lowedge, highedge := findedges(integral, wsize, thresh) - if toonarrow(img, lowedge, highedge, min) { - return img - } - return wipesides(img, lowedge, highedge) -} - -// WipeFile wipes an image file, filling the sections of the image -// which fall outside the content area with white, providing the -// content area is above min %. -// inPath: path of the input image. -// outPath: path to save the output image. -// wsize: window size for wipe algorithm. -// thresh: threshold for wipe algorithm. -// min: minimum % of content area width to consider valid. -func WipeFile(inPath string, outPath string, wsize int, thresh float64, min int) error { - f, err := os.Open(inPath) - defer f.Close() - if err != nil { - return errors.New(fmt.Sprintf("Could not open file %s: %v", inPath, err)) - } - img, _, err := image.Decode(f) - if err != nil { - return errors.New(fmt.Sprintf("Could not decode image: %v", err)) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - clean := Wipe(gray, wsize, thresh, min) - - f, err = os.Create(outPath) - if err != nil { - return errors.New(fmt.Sprintf("Could not create file %s: %v", outPath, err)) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - return errors.New(fmt.Sprintf("Could not encode image: %v", err)) - } - return nil -} diff --git a/wipesides_test.go b/wipesides_test.go deleted file mode 100644 index 4906c5b..0000000 --- a/wipesides_test.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019-2020 Nick White. -// Use of this source code is governed by the GPLv3 -// license that can be found in the LICENSE file. - -package preproc - -// TODO: add different pages as test cases -// TODO: test non integral img version - -import ( - "fmt" - "image" - "image/png" - "os" - "testing" -) - -func TestWipeSides(t *testing.T) { - cases := []struct { - name string - orig string - golden string - thresh float64 - wsize int - }{ - {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.02_w5.png", 0.02, 5}, - {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w5.png", 0.05, 5}, - {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w25.png", 0.05, 25}, - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) { - var actual *image.Gray - orig, err := decode(c.orig) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.orig, err) - } - actual = Wipe(orig, c.wsize, c.thresh, 30) - if *update { - f, err := os.Create(c.golden) - defer f.Close() - if err != nil { - t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) - } - err = png.Encode(f, actual) - if err != nil { - t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) - } - } - golden, err := decode(c.golden) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.golden, err) - } - if !imgsequal(golden, actual) { - t.Errorf("Processed %s differs to %s\n", c.orig, c.golden) - } - }) - } -} |