diff options
Diffstat (limited to 'parse')
| -rw-r--r-- | parse/hocr/hocr.go | 5 | ||||
| -rw-r--r-- | parse/line.go | 18 | ||||
| -rw-r--r-- | parse/prob/prob.go | 72 | 
3 files changed, 92 insertions, 3 deletions
| diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go index a281a7a..f7cac05 100644 --- a/parse/hocr/hocr.go +++ b/parse/hocr/hocr.go @@ -1,7 +1,8 @@  package hocr  // TODO: consider making GetLineDetails() a function of Hocr, so could do a -//       similar thing with prob format files too. +//       similar thing with prob format files too, and then fire them both +//       off a generic interface, potentially.  // TODO: Parse line name to zero pad line numbers, so they come out in the correct order  import ( @@ -137,7 +138,7 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro  		}  		line.Text = strings.TrimRight(linetext, " ")  		line.Text += "\n" -		line.Hocrname = name +		line.OcrName = name  		var imgd parse.ImgDirect  		imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))  		line.Img = imgd diff --git a/parse/line.go b/parse/line.go index 3ddde76..9a2be8e 100644 --- a/parse/line.go +++ b/parse/line.go @@ -9,6 +9,7 @@ import (  	"image"  	"image/png"  	"io" +	"os"  )  type LineDetail struct { @@ -16,7 +17,7 @@ type LineDetail struct {  	Avgconf float64  	Img CopyableLine  	Text string -	Hocrname string +	OcrName string  }  type CopyableLine interface { @@ -37,6 +38,21 @@ func (i ImgDirect) CopyLineTo(w io.Writer) (error) {  	return nil  } +type ImgPath struct { +	Path string +} + +func (i ImgPath) CopyLineTo(w io.Writer) (error) { +	f, err := os.Open(i.Path) +	if err != nil { +		return err +	} +	defer f.Close() + +	_, err = io.Copy(w, f) +	return err +} +  type LineDetails []LineDetail  // Used by sort.Sort. diff --git a/parse/prob/prob.go b/parse/prob/prob.go new file mode 100644 index 0000000..5a84567 --- /dev/null +++ b/parse/prob/prob.go @@ -0,0 +1,72 @@ +package prob + +import ( +	"bufio" +	"io/ioutil" +	"path/filepath" +	"strconv" +	"strings" + +	"git.rescribe.xyz/testingtools/parse" +) + +// TODO: probably switch to just relying on io.Reader +func getLineAvg(r *bufio.Reader) (float64, error) { +	var err error + +	totalconf := float64(0) +	num := 0 + +	err = nil +	for err == nil { +		var line string +		line, err = r.ReadString('\n') +		fields := strings.Fields(line) + +		if len(fields) == 2 { +			conf, converr := strconv.ParseFloat(fields[1], 64) +			if converr != nil { +				continue +			} +			totalconf += conf +			num += 1 +		} +	} +	if num <= 0 { +		return 0, nil +	} +	avg := totalconf / float64(num) +	return avg, nil +} + +// TODO: probably switch to just relying on io.Reader +// Note this only processes one line at a time +func GetLineDetails(name string, r *bufio.Reader) (parse.LineDetails, error) { +	var line parse.LineDetail +	lines := make(parse.LineDetails, 0) + +	avg, err := getLineAvg(r) +	if err != nil { +		return lines, err +	} + +	filebase := strings.Replace(name, ".prob", "", 1) + +	txt, err := ioutil.ReadFile(filebase + ".txt") +	if err != nil { +		return lines, err +	} + +	line.Name = name +	line.Avgconf = avg +	line.Text = string(txt) +	line.OcrName = filepath.Dir(filebase) + +	var imgfn parse.ImgPath +	imgfn.Path = filebase + ".bin.png" +	line.Img = imgfn + +	lines = append(lines, line) + +	return lines, nil +} | 
