summaryrefslogtreecommitdiff
path: root/parse
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-01-23 21:54:09 +0000
committerNick White <git@njw.name>2019-01-23 21:54:15 +0000
commitfb77852acbdbbcedcdb9771770cb6771da002851 (patch)
tree233b6cedea313702c994919906d156d829ed378e /parse
parentd256f967a26ceeb7c3987a1fc447b126a35054f9 (diff)
Update line-conf-buckets to mostly use package functions too.
Working now, but needs more consolidation to be worth it.
Diffstat (limited to 'parse')
-rw-r--r--parse/hocr/hocr.go5
-rw-r--r--parse/line.go18
-rw-r--r--parse/prob/prob.go72
3 files changed, 92 insertions, 3 deletions
diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go
index a281a7a..f7cac05 100644
--- a/parse/hocr/hocr.go
+++ b/parse/hocr/hocr.go
@@ -1,7 +1,8 @@
package hocr
// TODO: consider making GetLineDetails() a function of Hocr, so could do a
-// similar thing with prob format files too.
+// similar thing with prob format files too, and then fire them both
+// off a generic interface, potentially.
// TODO: Parse line name to zero pad line numbers, so they come out in the correct order
import (
@@ -137,7 +138,7 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro
}
line.Text = strings.TrimRight(linetext, " ")
line.Text += "\n"
- line.Hocrname = name
+ line.OcrName = name
var imgd parse.ImgDirect
imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
line.Img = imgd
diff --git a/parse/line.go b/parse/line.go
index 3ddde76..9a2be8e 100644
--- a/parse/line.go
+++ b/parse/line.go
@@ -9,6 +9,7 @@ import (
"image"
"image/png"
"io"
+ "os"
)
type LineDetail struct {
@@ -16,7 +17,7 @@ type LineDetail struct {
Avgconf float64
Img CopyableLine
Text string
- Hocrname string
+ OcrName string
}
type CopyableLine interface {
@@ -37,6 +38,21 @@ func (i ImgDirect) CopyLineTo(w io.Writer) (error) {
return nil
}
+type ImgPath struct {
+ Path string
+}
+
+func (i ImgPath) CopyLineTo(w io.Writer) (error) {
+ f, err := os.Open(i.Path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ _, err = io.Copy(w, f)
+ return err
+}
+
type LineDetails []LineDetail
// Used by sort.Sort.
diff --git a/parse/prob/prob.go b/parse/prob/prob.go
new file mode 100644
index 0000000..5a84567
--- /dev/null
+++ b/parse/prob/prob.go
@@ -0,0 +1,72 @@
+package prob
+
+import (
+ "bufio"
+ "io/ioutil"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "git.rescribe.xyz/testingtools/parse"
+)
+
+// TODO: probably switch to just relying on io.Reader
+func getLineAvg(r *bufio.Reader) (float64, error) {
+ var err error
+
+ totalconf := float64(0)
+ num := 0
+
+ err = nil
+ for err == nil {
+ var line string
+ line, err = r.ReadString('\n')
+ fields := strings.Fields(line)
+
+ if len(fields) == 2 {
+ conf, converr := strconv.ParseFloat(fields[1], 64)
+ if converr != nil {
+ continue
+ }
+ totalconf += conf
+ num += 1
+ }
+ }
+ if num <= 0 {
+ return 0, nil
+ }
+ avg := totalconf / float64(num)
+ return avg, nil
+}
+
+// TODO: probably switch to just relying on io.Reader
+// Note this only processes one line at a time
+func GetLineDetails(name string, r *bufio.Reader) (parse.LineDetails, error) {
+ var line parse.LineDetail
+ lines := make(parse.LineDetails, 0)
+
+ avg, err := getLineAvg(r)
+ if err != nil {
+ return lines, err
+ }
+
+ filebase := strings.Replace(name, ".prob", "", 1)
+
+ txt, err := ioutil.ReadFile(filebase + ".txt")
+ if err != nil {
+ return lines, err
+ }
+
+ line.Name = name
+ line.Avgconf = avg
+ line.Text = string(txt)
+ line.OcrName = filepath.Dir(filebase)
+
+ var imgfn parse.ImgPath
+ imgfn.Path = filebase + ".bin.png"
+ line.Img = imgfn
+
+ lines = append(lines, line)
+
+ return lines, nil
+}