summaryrefslogtreecommitdiff
path: root/lib/hocr
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-14 18:02:34 +0100
committerNick White <git@njw.name>2019-05-14 18:02:34 +0100
commitf49a8a74a8ef2c96cc2bbf34461a8387f7e324d8 (patch)
treeec3c67a1cd6c5cfb7a787b2936799af72bc18743 /lib/hocr
parent544c42ebae13c4f215a0722b4067d4adb715be65 (diff)
Rewrite pgconf to be more accurate by measuring average word confidence rather than average line confidence
Diffstat (limited to 'lib/hocr')
-rw-r--r--lib/hocr/hocr.go25
1 files changed, 25 insertions, 0 deletions
diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go
index f6316d8..bfc6a98 100644
--- a/lib/hocr/hocr.go
+++ b/lib/hocr/hocr.go
@@ -98,3 +98,28 @@ func GetText(hocrfn string) (string, error) {
}
return s, nil
}
+
+func GetAvgConf(hocrfn string) (float64, error) {
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return 0, err
+ }
+
+ h, err := Parse(file)
+ if err != nil {
+ return 0, err
+ }
+
+ var total, num float64
+ for _, l := range h.Lines {
+ for _, w := range l.Words {
+ c, err := wordConf(w.Title)
+ if err != nil {
+ return 0, err
+ }
+ total += c
+ num++
+ }
+ }
+ return total / num, nil
+}