diff options
| author | Nick White <git@njw.name> | 2019-05-14 18:02:34 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-05-14 18:02:34 +0100 | 
| commit | f49a8a74a8ef2c96cc2bbf34461a8387f7e324d8 (patch) | |
| tree | ec3c67a1cd6c5cfb7a787b2936799af72bc18743 /lib | |
| parent | 544c42ebae13c4f215a0722b4067d4adb715be65 (diff) | |
Rewrite pgconf to be more accurate by measuring average word confidence rather than average line confidence
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/hocr/hocr.go | 25 | 
1 files changed, 25 insertions, 0 deletions
| diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go index f6316d8..bfc6a98 100644 --- a/lib/hocr/hocr.go +++ b/lib/hocr/hocr.go @@ -98,3 +98,28 @@ func GetText(hocrfn string) (string, error) {  	}  	return s, nil  } + +func GetAvgConf(hocrfn string) (float64, error) { +	file, err := ioutil.ReadFile(hocrfn) +	if err != nil { +		return 0, err +	} + +	h, err := Parse(file) +	if err != nil { +		return 0, err +	} + +	var total, num float64 +	for _, l := range h.Lines { +		for _, w := range l.Words { +			c, err := wordConf(w.Title) +			if err != nil { +				return 0, err +			} +			total += c +			num++ +		} +	} +	return total / num, nil +} | 
