diff options
author | Nick White <git@njw.name> | 2020-01-22 16:17:05 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2020-01-22 16:17:05 +0000 |
commit | 632a149df0196f0f057fa4d552aa28d22901bcda (patch) | |
tree | 8149663e07e61556d2f439a6c35ff13e19f7e022 /pkg/hocr/hocr.go | |
parent | 3e137cf7a2e163b25b2109a4182a453a38f3a4de (diff) |
Add GetWordConfs function to hocr pkg
Diffstat (limited to 'pkg/hocr/hocr.go')
-rw-r--r-- | pkg/hocr/hocr.go | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/pkg/hocr/hocr.go b/pkg/hocr/hocr.go index c3c88b3..fa8ae8e 100644 --- a/pkg/hocr/hocr.go +++ b/pkg/hocr/hocr.go @@ -127,3 +127,32 @@ func GetAvgConf(hocrfn string) (float64, error) { } return total / num, nil } + +// GetWordConfs is a utility function that parses a hocr +// file and returns an array containing the confidences +// of each word therein. +func GetWordConfs(hocrfn string) ([]float64, error) { + var confs []float64 + + file, err := ioutil.ReadFile(hocrfn) + if err != nil { + return confs, err + } + + h, err := Parse(file) + if err != nil { + return confs, err + } + + for _, l := range h.Lines { + for _, w := range l.Words { + c, err := wordConf(w.Title) + if err != nil { + return confs, err + } + confs = append(confs, c) + } + } + + return confs, nil +} |