diff options
Diffstat (limited to 'cmd/splittable')
-rw-r--r-- | cmd/splittable/main.go | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/cmd/splittable/main.go b/cmd/splittable/main.go new file mode 100644 index 0000000..1029db1 --- /dev/null +++ b/cmd/splittable/main.go @@ -0,0 +1,139 @@ +package main + +import ( + "flag" + "fmt" + "image" + "image/draw" + _ "image/jpeg" + "image/png" + "log" + "os" + + "rescribe.xyz/preproc/integralimg" +) + +const usage = `Usage: splittable [-t thresh] [-w winsize] inimg outbase + +splittable is an experimental program to split a table into individual +cells suitable for OCR. It does this by detecting lines. At present it +just detects vertical lines and outputs images for each section +between those lines. + +` + +// returns the proportion of the given window that is black pixels +func proportion(i integralimg.I, x int, size int) float64 { + w := i.GetVerticalWindow(x, size) + return w.Proportion() +} + +// findbestvline goes through every vertical line from x to x+w to +// find the one with the lowest proportion of black pixels. +func findbestvline(img integralimg.I, x int, w int) int { + var bestx int + var best float64 + + if w == 1 { + return x + } + + right := x + w + for ; x < right; x++ { + prop := proportion(img, x, 1) + if prop > best { + best = prop + bestx = x + } + } + + return bestx +} + +// findvlines finds vertical lines, returning an array of x coordinates +// for each line. It works by moving a window of wsize across the image, +// marking each place where there is a higher proportion of black pixels +// than thresh. +func findvlines(img integralimg.I, wsize int, thresh float64) []int { + maxx := len(img[0]) - 1 + var lines []int + + for x := 0; x < maxx-wsize; x+=wsize { + if proportion(img, x, wsize) >= thresh { + l := findbestvline(img, x, wsize) + lines = append(lines, l) + } + } + + return lines +} + +func drawsection(img *image.Gray, x1 int, x2 int) *image.Gray { + b := img.Bounds() + width := x2-x1 + new := image.NewGray(image.Rect(0, b.Min.Y, width, b.Max.Y)) + + for x := 0; x < width; x++ { + for y := b.Min.Y; y < b.Max.Y; y++ { + new.SetGray(x, y, img.GrayAt(x1 + x, y)) + } + } + + return new +} + +func main() { + flag.Usage = func() { + fmt.Fprintf(flag.CommandLine.Output(), usage) + flag.PrintDefaults() + } + thresh := flag.Float64("t", 0.85, "Threshold for the proportion of black pixels below which a window is determined to be a line. Higher means fewer lines will be found.") + wsize := flag.Int("w", 1, "Window size for mask finding algorithm.") + flag.Parse() + if flag.NArg() < 2 { + flag.Usage() + os.Exit(1) + } + + f, err := os.Open(flag.Arg(0)) + defer f.Close() + if err != nil { + log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) + } + img, _, err := image.Decode(f) + if err != nil { + log.Fatalf("Could not decode image: %v\n", err) + } + b := img.Bounds() + gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) + draw.Draw(gray, b, img, b.Min, draw.Src) + + integral := integralimg.ToIntegralImg(gray) + vlines := findvlines(integral, *wsize, *thresh) + + for i, v := range vlines { + fmt.Printf("line detected at x=%d\n", v) + + if i+1 >= len(vlines) { + break + } + section := drawsection(gray, v, vlines[i+1]) + + fn := fmt.Sprintf("%s-%d.png", flag.Arg(1), v) + f, err = os.Create(fn) + if err != nil { + log.Fatalf("Could not create file %s: %v\n", fn, err) + } + defer f.Close() + err := png.Encode(f, section) + if err != nil { + log.Fatalf("Could not encode image %s: %v\n", fn, err) + } + } + + + // TODO: find horizontal lines too + // TODO: do rotation + // TODO: output table cells + // TODO: potentially send cells straight to tesseract +} |