From f49a8a74a8ef2c96cc2bbf34461a8387f7e324d8 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 14 May 2019 18:02:34 +0100 Subject: Rewrite pgconf to be more accurate by measuring average word confidence rather than average line confidence --- lib/hocr/hocr.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'lib/hocr/hocr.go') diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go index f6316d8..bfc6a98 100644 --- a/lib/hocr/hocr.go +++ b/lib/hocr/hocr.go @@ -98,3 +98,28 @@ func GetText(hocrfn string) (string, error) { } return s, nil } + +func GetAvgConf(hocrfn string) (float64, error) { + file, err := ioutil.ReadFile(hocrfn) + if err != nil { + return 0, err + } + + h, err := Parse(file) + if err != nil { + return 0, err + } + + var total, num float64 + for _, l := range h.Lines { + for _, w := range l.Words { + c, err := wordConf(w.Title) + if err != nil { + return 0, err + } + total += c + num++ + } + } + return total / num, nil +} -- cgit v1.2.1-24-ge1ad