summaryrefslogtreecommitdiff
path: root/preproc/wipesides.go
blob: 24fb7bd7b1dfdeb88f34c1866d7295c441756d28 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
package preproc

// TODO: add minimum size variable (default ~30%?)
// TODO: switch to an interface rather than integralimg.I

import (
	"image"
	"image/color"

	"rescribe.xyz/go.git/integralimg"
)

// returns the proportion of the given window that is black pixels
func proportion(i integralimg.I, x int, size int) float64 {
	w := i.GetVerticalWindow(x, size)
	return w.Proportion()
}

// findbestedge goes through every vertical line from x to x+w to
// find the one with the lowest proportion of black pixels.
func findbestedge(img integralimg.I, x int, w int) int {
	var bestx int
	var best float64

	if w == 1 {
		return x
	}

	right := x + w
	for ; x < right; x++ {
		prop := proportion(img, x, 1)
		if prop > best {
			best = prop
			bestx = x
		}
	}

	return bestx
}

// findedges finds the edges of the main content, by moving a window of wsize
// from near the middle of the image to the left and right, stopping when it reaches
// a point at which there is a lower proportion of black pixels than thresh.
func findedges(img integralimg.I, wsize int, thresh float64) (int, int) {
	maxx := len(img[0]) - 1
	var lowedge, highedge int = 0, maxx

	// don't start at the middle, as this will fail for 2 column layouts,
	// start 10% left or right of the middle
	notcentre := maxx / 10

	for x := maxx / 2 + notcentre; x < maxx-wsize; x++ {
		if proportion(img, x, wsize) <= thresh {
			highedge = findbestedge(img, x, wsize)
			break
		}
	}

	for x := maxx / 2 - notcentre; x > 0; x-- {
		if proportion(img, x, wsize) <= thresh {
			lowedge = findbestedge(img, x, wsize)
			break
		}
	}

	return lowedge, highedge
}

// wipesides fills the sections of image not within the boundaries
// of lowedge and highedge with white
func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {
	b := img.Bounds()
	new := image.NewGray(b)

	// set left edge white
	for x := b.Min.X; x < lowedge; x++ {
		for y := b.Min.Y; y < b.Max.Y; y++ {
			new.SetGray(x, y, color.Gray{255})
		}
	}
	// copy middle
	for x := lowedge; x < highedge; x++ {
		for y := b.Min.Y; y < b.Max.Y; y++ {
			new.SetGray(x, y, img.GrayAt(x, y))
		}
	}
	// set right edge white
	for x := highedge; x < b.Max.X; x++ {
		for y := b.Min.Y; y < b.Max.Y; y++ {
			new.SetGray(x, y, color.Gray{255})
		}
	}

	return new
}

// toonarrow checks whether the area between lowedge and highedge is
// less than min % of the total image width
func toonarrow(img *image.Gray, lowedge int, highedge int, min int) bool {
	b := img.Bounds()
	imgw := b.Max.X - b.Min.X
	wipew := highedge - lowedge
	if float64(wipew) / float64(imgw) * 100 < float64(min) {
		return true
	}
	return false
}

// wipe fills the sections of image which fall outside the content
// area with white, providing the content area is above min %
func Wipe(img *image.Gray, wsize int, thresh float64, min int) *image.Gray {
	integral := integralimg.ToIntegralImg(img)
	lowedge, highedge := findedges(integral, wsize, thresh)
	if toonarrow(img, lowedge, highedge, min) {
		return img
	}
	return wipesides(img, lowedge, highedge)
}