summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-06-03 10:54:02 +0100
committerNick White <git@njw.name>2019-06-03 10:54:02 +0100
commit20b694ee8510692804908f51fbbee3c39c859f37 (patch)
tree106a1354022f1f4ad8983799a88a26dd0f665969
parentd7f07893d08d9c29f46e50c4f779b0e701f411e4 (diff)
Add -m option to wipe to set minimum content area for wipe to proceed
If content is very light or sparse it may be better to not wipe at all than wipe almost all of the content leaving a small strip. This is done now by aborting the wipe if the detected content takes up less than the minimum % of the page (default is 30%).
-rw-r--r--preproc/cmd/preproc/main.go6
-rw-r--r--preproc/cmd/preprocmulti/main.go6
-rw-r--r--preproc/cmd/wipe/main.go9
-rw-r--r--preproc/wipesides.go19
4 files changed, 27 insertions, 13 deletions
diff --git a/preproc/cmd/preproc/main.go b/preproc/cmd/preproc/main.go
index 053de8f..d8d5b66 100644
--- a/preproc/cmd/preproc/main.go
+++ b/preproc/cmd/preproc/main.go
@@ -1,7 +1,6 @@
package main
// TODO: come up with a way to set a good ksize automatically
-// TODO: add minimum size variable (default ~30%?) for wipe
import (
"flag"
@@ -23,13 +22,14 @@ func autowsize(bounds image.Rectangle) int {
func main() {
flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-wt wipethresh] [-ws wipesize] inimg outimg\n")
+ fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-wt wipethresh] [-ws wipesize] inimg outimg\n")
fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n")
flag.PrintDefaults()
}
binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
+ min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")
flag.Parse()
@@ -71,7 +71,7 @@ func main() {
}
log.Print("Wiping sides")
- clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh)
+ clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)
f, err = os.Create(flag.Arg(1))
if err != nil {
diff --git a/preproc/cmd/preprocmulti/main.go b/preproc/cmd/preprocmulti/main.go
index eea7ab4..947625e 100644
--- a/preproc/cmd/preprocmulti/main.go
+++ b/preproc/cmd/preprocmulti/main.go
@@ -1,7 +1,6 @@
package main
// TODO: come up with a way to set a good ksize automatically
-// TODO: add minimum size variable (default ~30%?) for wipe
import (
"flag"
@@ -26,7 +25,7 @@ func main() {
ksizes := []float64{0.2, 0.3, 0.4, 0.5, 0.6}
flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-wt wipethresh] [-ws wipesize] inimg outbase\n")
+ fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-m minperc] [-wt wipethresh] [-ws wipesize] inimg outbase\n")
fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n")
fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n")
fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes)
@@ -34,6 +33,7 @@ func main() {
}
binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
+ min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")
flag.Parse()
@@ -80,7 +80,7 @@ func main() {
}
log.Print("Wiping sides")
- clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh)
+ clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)
savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k)
log.Printf("Saving %s\n", savefn)
diff --git a/preproc/cmd/wipe/main.go b/preproc/cmd/wipe/main.go
index e735a0a..ceff2e7 100644
--- a/preproc/cmd/wipe/main.go
+++ b/preproc/cmd/wipe/main.go
@@ -1,7 +1,5 @@
package main
-// TODO: add minimum size variable (default ~30%?)
-
import (
"flag"
"fmt"
@@ -17,12 +15,13 @@ import (
func main() {
flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: wipe [-t thresh] [-w winsize] inimg outimg\n")
+ fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n")
fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n")
flag.PrintDefaults()
}
- wsize := flag.Int("w", 5, "Window size for mask finding algorithm.")
+ min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge.")
+ wsize := flag.Int("w", 5, "Window size for mask finding algorithm.")
flag.Parse()
if flag.NArg() < 2 {
flag.Usage()
@@ -42,7 +41,7 @@ func main() {
gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
draw.Draw(gray, b, img, b.Min, draw.Src)
- clean := preproc.Wipe(gray, *wsize, *thresh)
+ clean := preproc.Wipe(gray, *wsize, *thresh, *min)
f, err = os.Create(flag.Arg(1))
if err != nil {
diff --git a/preproc/wipesides.go b/preproc/wipesides.go
index 04bfa11..9b8387a 100644
--- a/preproc/wipesides.go
+++ b/preproc/wipesides.go
@@ -90,10 +90,25 @@ func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {
return new
}
+// toonarrow checks whether the area between lowedge and highedge is
+// less than min % of the total image width
+func toonarrow(img *image.Gray, lowedge int, highedge int, min int) bool {
+ b := img.Bounds()
+ imgw := b.Max.X - b.Min.X
+ wipew := highedge - lowedge
+ if float64(wipew) / float64(imgw) * 100 < float64(min) {
+ return true
+ }
+ return false
+}
+
// wipe fills the sections of image which fall outside the content
-// area with white
-func Wipe(img *image.Gray, wsize int, thresh float64) *image.Gray {
+// area with white, providing the content area is above min %
+func Wipe(img *image.Gray, wsize int, thresh float64, min int) *image.Gray {
integral := integralimg.ToIntegralImg(img)
lowedge, highedge := findedges(integral, wsize, thresh)
+ if toonarrow(img, lowedge, highedge, min) {
+ return img
+ }
return wipesides(img, lowedge, highedge)
}