summaryrefslogtreecommitdiff
path: root/pkg/hocr/hocr.go
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-01-22 16:17:05 +0000
committerNick White <git@njw.name>2020-01-22 16:17:05 +0000
commit632a149df0196f0f057fa4d552aa28d22901bcda (patch)
tree8149663e07e61556d2f439a6c35ff13e19f7e022 /pkg/hocr/hocr.go
parent3e137cf7a2e163b25b2109a4182a453a38f3a4de (diff)
Add GetWordConfs function to hocr pkg
Diffstat (limited to 'pkg/hocr/hocr.go')
-rw-r--r--pkg/hocr/hocr.go29
1 files changed, 29 insertions, 0 deletions
diff --git a/pkg/hocr/hocr.go b/pkg/hocr/hocr.go
index c3c88b3..fa8ae8e 100644
--- a/pkg/hocr/hocr.go
+++ b/pkg/hocr/hocr.go
@@ -127,3 +127,32 @@ func GetAvgConf(hocrfn string) (float64, error) {
}
return total / num, nil
}
+
+// GetWordConfs is a utility function that parses a hocr
+// file and returns an array containing the confidences
+// of each word therein.
+func GetWordConfs(hocrfn string) ([]float64, error) {
+ var confs []float64
+
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return confs, err
+ }
+
+ h, err := Parse(file)
+ if err != nil {
+ return confs, err
+ }
+
+ for _, l := range h.Lines {
+ for _, w := range l.Words {
+ c, err := wordConf(w.Title)
+ if err != nil {
+ return confs, err
+ }
+ confs = append(confs, c)
+ }
+ }
+
+ return confs, nil
+}