summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cmd')
-rw-r--r--cmd/binarize/main.go82
-rw-r--r--cmd/preproc/main.go94
-rw-r--r--cmd/preprocmulti/main.go105
-rw-r--r--cmd/splittable/main.go143
-rw-r--r--cmd/wipe/main.go59
5 files changed, 0 insertions, 483 deletions
diff --git a/cmd/binarize/main.go b/cmd/binarize/main.go
deleted file mode 100644
index a844982..0000000
--- a/cmd/binarize/main.go
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n")
- flag.PrintDefaults()
- }
- wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
- ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
- btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *wsize == 0 {
- *wsize = autowsize(b)
- log.Printf("Set window size to %d\n", *wsize)
- }
-
- if *wsize%2 == 0 {
- *wsize++
- }
-
- // TODO: come up with a way to set a good ksize automatically
-
- var thresh image.Image
- thresh = preproc.IntegralSauvola(gray, *ksize, *wsize)
-
- if *btype == "zeroinv" {
- thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, thresh)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/cmd/preproc/main.go b/cmd/preproc/main.go
deleted file mode 100644
index 3323635..0000000
--- a/cmd/preproc/main.go
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-nowipe] [-wt wipethresh] [-ws wipesize] inimg outimg\n")
- fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n")
- flag.PrintDefaults()
- }
- binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
- ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
- btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
- wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
- thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *binwsize == 0 {
- *binwsize = autowsize(b)
- }
-
- if *binwsize%2 == 0 {
- *binwsize++
- }
-
- log.Print("Binarising")
- var clean, threshimg image.Image
- threshimg = preproc.IntegralSauvola(gray, *ksize, *binwsize)
-
- if *btype == "zeroinv" {
- threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- if !*nowipe {
- log.Print("Wiping sides")
- clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)
- } else {
- clean = threshimg
- }
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/cmd/preprocmulti/main.go b/cmd/preprocmulti/main.go
deleted file mode 100644
index aeac767..0000000
--- a/cmd/preprocmulti/main.go
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
- "rescribe.xyz/preproc/integralimg"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- ksizes := []float64{0.1, 0.2, 0.4, 0.5}
-
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preprocmulti [-bt bintype] [-bw winsize] [-m minperc] [-nowipe] [-ws wipesize] inimg outbase\n")
- fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n")
- fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n")
- fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes)
- flag.PrintDefaults()
- }
- binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
- btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
- wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- log.Printf("Opening %s\n", flag.Arg(0))
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *binwsize == 0 {
- *binwsize = autowsize(b)
- }
-
- if *binwsize%2 == 0 {
- *binwsize++
- }
-
- var clean, threshimg image.Image
- log.Print("Precalculating integral images")
- integrals := integralimg.ToAllIntegralImg(gray)
-
- for _, k := range ksizes {
- log.Print("Binarising")
- threshimg = preproc.PreCalcedSauvola(integrals, gray, k, *binwsize)
-
- if *btype == "zeroinv" {
- threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- if !*nowipe {
- log.Print("Wiping sides")
- clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, k*0.02, *min)
- } else {
- clean = threshimg
- }
-
- savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k)
- log.Printf("Saving %s\n", savefn)
- f, err = os.Create(savefn)
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", savefn, err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
- }
-}
diff --git a/cmd/splittable/main.go b/cmd/splittable/main.go
deleted file mode 100644
index e6df94a..0000000
--- a/cmd/splittable/main.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright 2020 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc/integralimg"
-)
-
-const usage = `Usage: splittable [-t thresh] [-w winsize] inimg outbase
-
-splittable is an experimental program to split a table into individual
-cells suitable for OCR. It does this by detecting lines. At present it
-just detects vertical lines and outputs images for each section
-between those lines.
-
-`
-
-// returns the proportion of the given window that is black pixels
-func proportion(i integralimg.I, x int, size int) float64 {
- w := i.GetVerticalWindow(x, size)
- return w.Proportion()
-}
-
-// findbestvline goes through every vertical line from x to x+w to
-// find the one with the lowest proportion of black pixels.
-func findbestvline(img integralimg.I, x int, w int) int {
- var bestx int
- var best float64
-
- if w == 1 {
- return x
- }
-
- right := x + w
- for ; x < right; x++ {
- prop := proportion(img, x, 1)
- if prop > best {
- best = prop
- bestx = x
- }
- }
-
- return bestx
-}
-
-// findvlines finds vertical lines, returning an array of x coordinates
-// for each line. It works by moving a window of wsize across the image,
-// marking each place where there is a higher proportion of black pixels
-// than thresh.
-func findvlines(img integralimg.I, wsize int, thresh float64) []int {
- maxx := len(img[0]) - 1
- var lines []int
-
- for x := 0; x < maxx-wsize; x+=wsize {
- if proportion(img, x, wsize) >= thresh {
- l := findbestvline(img, x, wsize)
- lines = append(lines, l)
- }
- }
-
- return lines
-}
-
-func drawsection(img *image.Gray, x1 int, x2 int) *image.Gray {
- b := img.Bounds()
- width := x2-x1
- new := image.NewGray(image.Rect(0, b.Min.Y, width, b.Max.Y))
-
- for x := 0; x < width; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, img.GrayAt(x1 + x, y))
- }
- }
-
- return new
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(flag.CommandLine.Output(), usage)
- flag.PrintDefaults()
- }
- thresh := flag.Float64("t", 0.85, "Threshold for the proportion of black pixels below which a window is determined to be a line. Higher means fewer lines will be found.")
- wsize := flag.Int("w", 1, "Window size for mask finding algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- integral := integralimg.ToIntegralImg(gray)
- vlines := findvlines(integral, *wsize, *thresh)
-
- for i, v := range vlines {
- fmt.Printf("line detected at x=%d\n", v)
-
- if i+1 >= len(vlines) {
- break
- }
- section := drawsection(gray, v, vlines[i+1])
-
- fn := fmt.Sprintf("%s-%d.png", flag.Arg(1), v)
- f, err = os.Create(fn)
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", fn, err)
- }
- defer f.Close()
- err := png.Encode(f, section)
- if err != nil {
- log.Fatalf("Could not encode image %s: %v\n", fn, err)
- }
- }
-
-
- // TODO: find horizontal lines too
- // TODO: do rotation
- // TODO: output table cells
- // TODO: potentially send cells straight to tesseract
-}
diff --git a/cmd/wipe/main.go b/cmd/wipe/main.go
deleted file mode 100644
index d4c95ac..0000000
--- a/cmd/wipe/main.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2019 Nick White.
-// Use of this source code is governed by the GPLv3
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/preproc"
-)
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n")
- fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n")
- flag.PrintDefaults()
- }
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge. Higher means more aggressive wiping.")
- wsize := flag.Int("w", 5, "Window size for mask finding algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- clean := preproc.Wipe(gray, *wsize, *thresh, *min)
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}