From f49a8a74a8ef2c96cc2bbf34461a8387f7e324d8 Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Tue, 14 May 2019 18:02:34 +0100
Subject: Rewrite pgconf to be more accurate by measuring average word
 confidence rather than average line confidence

---
 lib/hocr/hocr.go | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'lib/hocr')

diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go
index f6316d8..bfc6a98 100644
--- a/lib/hocr/hocr.go
+++ b/lib/hocr/hocr.go
@@ -98,3 +98,28 @@ func GetText(hocrfn string) (string, error) {
 	}
 	return s, nil
 }
+
+func GetAvgConf(hocrfn string) (float64, error) {
+	file, err := ioutil.ReadFile(hocrfn)
+	if err != nil {
+		return 0, err
+	}
+
+	h, err := Parse(file)
+	if err != nil {
+		return 0, err
+	}
+
+	var total, num float64
+	for _, l := range h.Lines {
+		for _, w := range l.Words {
+			c, err := wordConf(w.Title)
+			if err != nil {
+				return 0, err
+			}
+			total += c
+			num++
+		}
+	}
+	return total / num, nil
+}
-- 
cgit v1.2.1-24-ge1ad