From fb77852acbdbbcedcdb9771770cb6771da002851 Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 23 Jan 2019 21:54:09 +0000 Subject: Update line-conf-buckets to mostly use package functions too. Working now, but needs more consolidation to be worth it. --- parse/hocr/hocr.go | 5 ++-- parse/line.go | 18 +++++++++++++- parse/prob/prob.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 parse/prob/prob.go (limited to 'parse') diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go index a281a7a..f7cac05 100644 --- a/parse/hocr/hocr.go +++ b/parse/hocr/hocr.go @@ -1,7 +1,8 @@ package hocr // TODO: consider making GetLineDetails() a function of Hocr, so could do a -// similar thing with prob format files too. +// similar thing with prob format files too, and then fire them both +// off a generic interface, potentially. // TODO: Parse line name to zero pad line numbers, so they come out in the correct order import ( @@ -137,7 +138,7 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro } line.Text = strings.TrimRight(linetext, " ") line.Text += "\n" - line.Hocrname = name + line.OcrName = name var imgd parse.ImgDirect imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3])) line.Img = imgd diff --git a/parse/line.go b/parse/line.go index 3ddde76..9a2be8e 100644 --- a/parse/line.go +++ b/parse/line.go @@ -9,6 +9,7 @@ import ( "image" "image/png" "io" + "os" ) type LineDetail struct { @@ -16,7 +17,7 @@ type LineDetail struct { Avgconf float64 Img CopyableLine Text string - Hocrname string + OcrName string } type CopyableLine interface { @@ -37,6 +38,21 @@ func (i ImgDirect) CopyLineTo(w io.Writer) (error) { return nil } +type ImgPath struct { + Path string +} + +func (i ImgPath) CopyLineTo(w io.Writer) (error) { + f, err := os.Open(i.Path) + if err != nil { + return err + } + defer f.Close() + + _, err = io.Copy(w, f) + return err +} + type LineDetails []LineDetail // Used by sort.Sort. diff --git a/parse/prob/prob.go b/parse/prob/prob.go new file mode 100644 index 0000000..5a84567 --- /dev/null +++ b/parse/prob/prob.go @@ -0,0 +1,72 @@ +package prob + +import ( + "bufio" + "io/ioutil" + "path/filepath" + "strconv" + "strings" + + "git.rescribe.xyz/testingtools/parse" +) + +// TODO: probably switch to just relying on io.Reader +func getLineAvg(r *bufio.Reader) (float64, error) { + var err error + + totalconf := float64(0) + num := 0 + + err = nil + for err == nil { + var line string + line, err = r.ReadString('\n') + fields := strings.Fields(line) + + if len(fields) == 2 { + conf, converr := strconv.ParseFloat(fields[1], 64) + if converr != nil { + continue + } + totalconf += conf + num += 1 + } + } + if num <= 0 { + return 0, nil + } + avg := totalconf / float64(num) + return avg, nil +} + +// TODO: probably switch to just relying on io.Reader +// Note this only processes one line at a time +func GetLineDetails(name string, r *bufio.Reader) (parse.LineDetails, error) { + var line parse.LineDetail + lines := make(parse.LineDetails, 0) + + avg, err := getLineAvg(r) + if err != nil { + return lines, err + } + + filebase := strings.Replace(name, ".prob", "", 1) + + txt, err := ioutil.ReadFile(filebase + ".txt") + if err != nil { + return lines, err + } + + line.Name = name + line.Avgconf = avg + line.Text = string(txt) + line.OcrName = filepath.Dir(filebase) + + var imgfn parse.ImgPath + imgfn.Path = filebase + ".bin.png" + line.Img = imgfn + + lines = append(lines, line) + + return lines, nil +} -- cgit v1.2.1-24-ge1ad