diff options
-rw-r--r-- | line-conf-buckets-tess/line-conf-buckets-tess.go | 5 | ||||
-rw-r--r-- | parse/hocr/hocr.go (renamed from hocr/hocr.go) | 59 | ||||
-rw-r--r-- | parse/line.go | 51 |
3 files changed, 63 insertions, 52 deletions
diff --git a/line-conf-buckets-tess/line-conf-buckets-tess.go b/line-conf-buckets-tess/line-conf-buckets-tess.go index facd108..b24bdec 100644 --- a/line-conf-buckets-tess/line-conf-buckets-tess.go +++ b/line-conf-buckets-tess/line-conf-buckets-tess.go @@ -17,7 +17,8 @@ import ( "strconv" "strings" - "git.rescribe.xyz/testingtools/hocr" + "git.rescribe.xyz/testingtools/parse" + "git.rescribe.xyz/testingtools/parse/hocr" ) func main() { @@ -33,7 +34,7 @@ func main() { os.Exit(1) } - lines := make(hocr.LineDetails, 0) + lines := make(parse.LineDetails, 0) for _, f := range flag.Args() { file, err := ioutil.ReadFile(f) diff --git a/hocr/hocr.go b/parse/hocr/hocr.go index 0c1295c..a281a7a 100644 --- a/hocr/hocr.go +++ b/parse/hocr/hocr.go @@ -1,59 +1,18 @@ package hocr -// TODO: separate out linedetail to a general structure that can incorporate -// line-conf-buckets too, in a different file (and rename package to -// something more generic). Do this using the CopyableLine interface +// TODO: consider making GetLineDetails() a function of Hocr, so could do a +// similar thing with prob format files too. // TODO: Parse line name to zero pad line numbers, so they come out in the correct order import ( "encoding/xml" "image" - "image/png" - "io" "regexp" "strconv" "strings" -) - -// TODO: move the linedetail stuff out to a separate file, and create a new -// CopyableLine implementing struct for ocropy, which will just store -// a file location -type LineDetail struct { - Name string - Avgconf float64 - Img CopyableLine - Text string - Hocrname string -} - -type CopyableLine interface { - CopyLineTo(io.Writer) (error) -} - -type ImgDirect struct { - img image.Image -} - -func (i ImgDirect) CopyLineTo(w io.Writer) (error) { - err := png.Encode(w, i.img) - if err != nil { - return err - } - return nil -} - -type LineDetails []LineDetail -// Used by sort.Sort. -func (l LineDetails) Len() int { return len(l) } - -// Used by sort.Sort. -func (l LineDetails) Less(i, j int) bool { - return l[i].Avgconf < l[j].Avgconf -} - -// Used by sort.Sort. -func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] } + "git.rescribe.xyz/testingtools/parse" +) type Hocr struct { Lines []OcrLine `xml:"body>div>div>p>span"` @@ -126,8 +85,8 @@ func Parse(b []byte) (Hocr, error) { return hocr, nil } -func GetLineDetails(h Hocr, i image.Image, name string) (LineDetails, error) { - lines := make(LineDetails, 0) +func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) { + lines := make(parse.LineDetails, 0) for _, l := range h.Lines { totalconf := float64(0) @@ -146,7 +105,7 @@ func GetLineDetails(h Hocr, i image.Image, name string) (LineDetails, error) { return lines, err } - var line LineDetail + var line parse.LineDetail line.Name = l.Id line.Avgconf = (totalconf/float64(num)) / 100 linetext := "" @@ -179,8 +138,8 @@ func GetLineDetails(h Hocr, i image.Image, name string) (LineDetails, error) { line.Text = strings.TrimRight(linetext, " ") line.Text += "\n" line.Hocrname = name - var imgd ImgDirect - imgd.img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3])) + var imgd parse.ImgDirect + imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3])) line.Img = imgd lines = append(lines, line) } diff --git a/parse/line.go b/parse/line.go new file mode 100644 index 0000000..3ddde76 --- /dev/null +++ b/parse/line.go @@ -0,0 +1,51 @@ +package parse + +// TODO: integrate in line-conf-buckets linedetail +// TODO: add BucketUp() function here that does what both line-conf-buckets-tess.go +// and line-conf-buckets.go do +// TODO: consider naming this package line, and separating it from hocr and prob + +import ( + "image" + "image/png" + "io" +) + +type LineDetail struct { + Name string + Avgconf float64 + Img CopyableLine + Text string + Hocrname string +} + +type CopyableLine interface { + CopyLineTo(io.Writer) (error) +} + +// This is an implementation of the CopyableLine interface that +// stores the image directly as an image.Image +type ImgDirect struct { + Img image.Image +} + +func (i ImgDirect) CopyLineTo(w io.Writer) (error) { + err := png.Encode(w, i.Img) + if err != nil { + return err + } + return nil +} + +type LineDetails []LineDetail + +// Used by sort.Sort. +func (l LineDetails) Len() int { return len(l) } + +// Used by sort.Sort. +func (l LineDetails) Less(i, j int) bool { + return l[i].Avgconf < l[j].Avgconf +} + +// Used by sort.Sort. +func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] } |