diff options
| author | Nick White <git@njw.name> | 2019-01-23 20:47:33 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-01-23 20:47:33 +0000 | 
| commit | d256f967a26ceeb7c3987a1fc447b126a35054f9 (patch) | |
| tree | f80c1e3e2c3757c59ad51dec98de5b6e82a426fa | |
| parent | c41aa16d8a3d35ce4185184ee50536bf2089a120 (diff) | |
Separate out hocr parts from line parts
| -rw-r--r-- | line-conf-buckets-tess/line-conf-buckets-tess.go | 5 | ||||
| -rw-r--r-- | parse/hocr/hocr.go (renamed from hocr/hocr.go) | 59 | ||||
| -rw-r--r-- | parse/line.go | 51 | 
3 files changed, 63 insertions, 52 deletions
| diff --git a/line-conf-buckets-tess/line-conf-buckets-tess.go b/line-conf-buckets-tess/line-conf-buckets-tess.go index facd108..b24bdec 100644 --- a/line-conf-buckets-tess/line-conf-buckets-tess.go +++ b/line-conf-buckets-tess/line-conf-buckets-tess.go @@ -17,7 +17,8 @@ import (  	"strconv"  	"strings" -	"git.rescribe.xyz/testingtools/hocr" +	"git.rescribe.xyz/testingtools/parse" +	"git.rescribe.xyz/testingtools/parse/hocr"  )  func main() { @@ -33,7 +34,7 @@ func main() {  		os.Exit(1)  	} -	lines := make(hocr.LineDetails, 0) +	lines := make(parse.LineDetails, 0)  	for _, f := range flag.Args() {  		file, err := ioutil.ReadFile(f) diff --git a/hocr/hocr.go b/parse/hocr/hocr.go index 0c1295c..a281a7a 100644 --- a/hocr/hocr.go +++ b/parse/hocr/hocr.go @@ -1,59 +1,18 @@  package hocr -// TODO: separate out linedetail to a general structure that can incorporate -//       line-conf-buckets too, in a different file (and rename package to -//       something more generic). Do this using the CopyableLine interface +// TODO: consider making GetLineDetails() a function of Hocr, so could do a +//       similar thing with prob format files too.  // TODO: Parse line name to zero pad line numbers, so they come out in the correct order  import (  	"encoding/xml"  	"image" -	"image/png" -	"io"  	"regexp"  	"strconv"  	"strings" -) - -// TODO: move the linedetail stuff out to a separate file, and create a new -//       CopyableLine implementing struct for ocropy, which will just store -//       a file location -type LineDetail struct { -	Name string -	Avgconf float64 -	Img CopyableLine -	Text string -	Hocrname string -} - -type CopyableLine interface { -	CopyLineTo(io.Writer) (error) -} - -type ImgDirect struct { -	img image.Image -} - -func (i ImgDirect) CopyLineTo(w io.Writer) (error) { -	err := png.Encode(w, i.img) -	if err != nil { -		return err -	} -	return nil -} - -type LineDetails []LineDetail -// Used by sort.Sort. -func (l LineDetails) Len() int { return len(l) } - -// Used by sort.Sort. -func (l LineDetails) Less(i, j int) bool { -	return l[i].Avgconf < l[j].Avgconf -} - -// Used by sort.Sort. -func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] } +	"git.rescribe.xyz/testingtools/parse" +)  type Hocr struct {  	Lines []OcrLine `xml:"body>div>div>p>span"` @@ -126,8 +85,8 @@ func Parse(b []byte) (Hocr, error) {  	return hocr, nil  } -func GetLineDetails(h Hocr, i image.Image, name string) (LineDetails, error) { -	lines := make(LineDetails, 0) +func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) { +	lines := make(parse.LineDetails, 0)  	for _, l := range h.Lines {  		totalconf := float64(0) @@ -146,7 +105,7 @@ func GetLineDetails(h Hocr, i image.Image, name string) (LineDetails, error) {  			return lines, err  		} -		var line LineDetail +		var line parse.LineDetail  		line.Name = l.Id  		line.Avgconf = (totalconf/float64(num)) / 100  		linetext := "" @@ -179,8 +138,8 @@ func GetLineDetails(h Hocr, i image.Image, name string) (LineDetails, error) {  		line.Text = strings.TrimRight(linetext, " ")  		line.Text += "\n"  		line.Hocrname = name -		var imgd ImgDirect -		imgd.img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3])) +		var imgd parse.ImgDirect +		imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))  		line.Img = imgd  		lines = append(lines, line)  	} diff --git a/parse/line.go b/parse/line.go new file mode 100644 index 0000000..3ddde76 --- /dev/null +++ b/parse/line.go @@ -0,0 +1,51 @@ +package parse + +// TODO: integrate in line-conf-buckets linedetail +// TODO: add BucketUp() function here that does what both line-conf-buckets-tess.go +//       and line-conf-buckets.go do +// TODO: consider naming this package line, and separating it from hocr and prob + +import ( +	"image" +	"image/png" +	"io" +) + +type LineDetail struct { +	Name string +	Avgconf float64 +	Img CopyableLine +	Text string +	Hocrname string +} + +type CopyableLine interface { +	CopyLineTo(io.Writer) (error) +} + +// This is an implementation of the CopyableLine interface that +// stores the image directly as an image.Image +type ImgDirect struct { +	Img image.Image +} + +func (i ImgDirect) CopyLineTo(w io.Writer) (error) { +	err := png.Encode(w, i.Img) +	if err != nil { +		return err +	} +	return nil +} + +type LineDetails []LineDetail + +// Used by sort.Sort. +func (l LineDetails) Len() int { return len(l) } + +// Used by sort.Sort. +func (l LineDetails) Less(i, j int) bool { +	return l[i].Avgconf < l[j].Avgconf +} + +// Used by sort.Sort. +func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] } | 
