diff options
| author | Nick White <git@njw.name> | 2019-06-03 10:54:02 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-06-03 10:54:02 +0100 | 
| commit | 20b694ee8510692804908f51fbbee3c39c859f37 (patch) | |
| tree | 106a1354022f1f4ad8983799a88a26dd0f665969 /preproc | |
| parent | d7f07893d08d9c29f46e50c4f779b0e701f411e4 (diff) | |
Add -m option to wipe to set minimum content area for wipe to proceed
If content is very light or sparse it may be better to not wipe at all
than wipe almost all of the content leaving a small strip. This is done
now by aborting the wipe if the detected content takes up less than the
minimum % of the page (default is 30%).
Diffstat (limited to 'preproc')
| -rw-r--r-- | preproc/cmd/preproc/main.go | 6 | ||||
| -rw-r--r-- | preproc/cmd/preprocmulti/main.go | 6 | ||||
| -rw-r--r-- | preproc/cmd/wipe/main.go | 9 | ||||
| -rw-r--r-- | preproc/wipesides.go | 19 | 
4 files changed, 27 insertions, 13 deletions
| diff --git a/preproc/cmd/preproc/main.go b/preproc/cmd/preproc/main.go index 053de8f..d8d5b66 100644 --- a/preproc/cmd/preproc/main.go +++ b/preproc/cmd/preproc/main.go @@ -1,7 +1,6 @@  package main  // TODO: come up with a way to set a good ksize automatically -// TODO: add minimum size variable (default ~30%?) for wipe  import (  	"flag" @@ -23,13 +22,14 @@ func autowsize(bounds image.Rectangle) int {  func main() {  	flag.Usage = func() { -		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-wt wipethresh] [-ws wipesize] inimg outimg\n") +		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-wt wipethresh] [-ws wipesize] inimg outimg\n")  		fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n")  		flag.PrintDefaults()  	}  	binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")  	ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")  	btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.") +	min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")  	wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")  	thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")  	flag.Parse() @@ -71,7 +71,7 @@ func main() {  	}  	log.Print("Wiping sides") -	clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh) +	clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)  	f, err = os.Create(flag.Arg(1))  	if err != nil { diff --git a/preproc/cmd/preprocmulti/main.go b/preproc/cmd/preprocmulti/main.go index eea7ab4..947625e 100644 --- a/preproc/cmd/preprocmulti/main.go +++ b/preproc/cmd/preprocmulti/main.go @@ -1,7 +1,6 @@  package main  // TODO: come up with a way to set a good ksize automatically -// TODO: add minimum size variable (default ~30%?) for wipe  import (  	"flag" @@ -26,7 +25,7 @@ func main() {  	ksizes := []float64{0.2, 0.3, 0.4, 0.5, 0.6}  	flag.Usage = func() { -		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-wt wipethresh] [-ws wipesize] inimg outbase\n") +		fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-m minperc] [-wt wipethresh] [-ws wipesize] inimg outbase\n")  		fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n")  		fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n")  		fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes) @@ -34,6 +33,7 @@ func main() {  	}  	binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")  	btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.") +	min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")  	wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")  	thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")  	flag.Parse() @@ -80,7 +80,7 @@ func main() {  		}  		log.Print("Wiping sides") -		clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh) +		clean := preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)  		savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k)  		log.Printf("Saving %s\n", savefn) diff --git a/preproc/cmd/wipe/main.go b/preproc/cmd/wipe/main.go index e735a0a..ceff2e7 100644 --- a/preproc/cmd/wipe/main.go +++ b/preproc/cmd/wipe/main.go @@ -1,7 +1,5 @@  package main -// TODO: add minimum size variable (default ~30%?) -  import (  	"flag"  	"fmt" @@ -17,12 +15,13 @@ import (  func main() {  	flag.Usage = func() { -		fmt.Fprintf(os.Stderr, "Usage: wipe [-t thresh] [-w winsize] inimg outimg\n") +		fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n")  		fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n")  		flag.PrintDefaults()  	} -	wsize := flag.Int("w", 5, "Window size for mask finding algorithm.") +	min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")  	thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge.") +	wsize := flag.Int("w", 5, "Window size for mask finding algorithm.")  	flag.Parse()  	if flag.NArg() < 2 {  		flag.Usage() @@ -42,7 +41,7 @@ func main() {  	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))  	draw.Draw(gray, b, img, b.Min, draw.Src) -	clean := preproc.Wipe(gray, *wsize, *thresh) +	clean := preproc.Wipe(gray, *wsize, *thresh, *min)  	f, err = os.Create(flag.Arg(1))  	if err != nil { diff --git a/preproc/wipesides.go b/preproc/wipesides.go index 04bfa11..9b8387a 100644 --- a/preproc/wipesides.go +++ b/preproc/wipesides.go @@ -90,10 +90,25 @@ func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {  	return new  } +// toonarrow checks whether the area between lowedge and highedge is +// less than min % of the total image width +func toonarrow(img *image.Gray, lowedge int, highedge int, min int) bool { +	b := img.Bounds() +	imgw := b.Max.X - b.Min.X +	wipew := highedge - lowedge +	if float64(wipew) / float64(imgw) * 100 < float64(min) { +		return true +	} +	return false +} +  // wipe fills the sections of image which fall outside the content -// area with white -func Wipe(img *image.Gray, wsize int, thresh float64) *image.Gray { +// area with white, providing the content area is above min % +func Wipe(img *image.Gray, wsize int, thresh float64, min int) *image.Gray {  	integral := integralimg.ToIntegralImg(img)  	lowedge, highedge := findedges(integral, wsize, thresh) +	if toonarrow(img, lowedge, highedge, min) { +		return img +	}  	return wipesides(img, lowedge, highedge)  } | 
