diff options
Diffstat (limited to 'cmd/splittable')
| -rw-r--r-- | cmd/splittable/main.go | 139 | 
1 files changed, 139 insertions, 0 deletions
| diff --git a/cmd/splittable/main.go b/cmd/splittable/main.go new file mode 100644 index 0000000..1029db1 --- /dev/null +++ b/cmd/splittable/main.go @@ -0,0 +1,139 @@ +package main + +import ( +	"flag" +	"fmt" +	"image" +	"image/draw" +	_ "image/jpeg" +	"image/png" +	"log" +	"os" + +	"rescribe.xyz/preproc/integralimg" +) + +const usage = `Usage: splittable [-t thresh] [-w winsize] inimg outbase + +splittable is an experimental program to split a table into individual +cells suitable for OCR. It does this by detecting lines. At present it +just detects vertical lines and outputs images for each section +between those lines. + +` + +// returns the proportion of the given window that is black pixels +func proportion(i integralimg.I, x int, size int) float64 { +	w := i.GetVerticalWindow(x, size) +	return w.Proportion() +} + +// findbestvline goes through every vertical line from x to x+w to +// find the one with the lowest proportion of black pixels. +func findbestvline(img integralimg.I, x int, w int) int { +	var bestx int +	var best float64 + +	if w == 1 { +		return x +	} + +	right := x + w +	for ; x < right; x++ { +		prop := proportion(img, x, 1) +		if prop > best { +			best = prop +			bestx = x +		} +	} + +	return bestx +} + +// findvlines finds vertical lines, returning an array of x coordinates +// for each line. It works by moving a window of wsize across the image, +// marking each place where there is a higher proportion of black pixels +// than thresh. +func findvlines(img integralimg.I, wsize int, thresh float64) []int { +	maxx := len(img[0]) - 1 +	var lines []int + +	for x := 0; x < maxx-wsize; x+=wsize { +		if proportion(img, x, wsize) >= thresh { +			l := findbestvline(img, x, wsize) +			lines = append(lines, l) +		} +	} + +	return lines +} + +func drawsection(img *image.Gray, x1 int, x2 int) *image.Gray { +	b := img.Bounds() +	width := x2-x1 +	new := image.NewGray(image.Rect(0, b.Min.Y, width, b.Max.Y)) + +	for x := 0; x < width; x++ { +		for y := b.Min.Y; y < b.Max.Y; y++ { +			new.SetGray(x, y, img.GrayAt(x1 + x, y)) +		} +	} + +	return new +} + +func main() { +	flag.Usage = func() { +		fmt.Fprintf(flag.CommandLine.Output(), usage) +		flag.PrintDefaults() +	} +	thresh := flag.Float64("t", 0.85, "Threshold for the proportion of black pixels below which a window is determined to be a line. Higher means fewer lines will be found.") +	wsize := flag.Int("w", 1, "Window size for mask finding algorithm.") +	flag.Parse() +	if flag.NArg() < 2 { +		flag.Usage() +		os.Exit(1) +	} + +	f, err := os.Open(flag.Arg(0)) +	defer f.Close() +	if err != nil { +		log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) +	} +	img, _, err := image.Decode(f) +	if err != nil { +		log.Fatalf("Could not decode image: %v\n", err) +	} +	b := img.Bounds() +	gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) +	draw.Draw(gray, b, img, b.Min, draw.Src) + +	integral := integralimg.ToIntegralImg(gray) +	vlines := findvlines(integral, *wsize, *thresh) + +	for i, v := range vlines { +		fmt.Printf("line detected at x=%d\n", v) + +		if i+1 >= len(vlines) { +			break +		} +		section := drawsection(gray, v, vlines[i+1]) + +		fn := fmt.Sprintf("%s-%d.png", flag.Arg(1), v) +		f, err = os.Create(fn) +		if err != nil { +			log.Fatalf("Could not create file %s: %v\n", fn, err) +		} +		defer f.Close() +		err := png.Encode(f, section) +		if err != nil { +			log.Fatalf("Could not encode image %s: %v\n", fn, err) +		} +	} + + +	// TODO: find horizontal lines too +	// TODO: do rotation +	// TODO: output table cells +	// TODO: potentially send cells straight to tesseract +} | 
