summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-05-14 18:02:34 +0100
committerNick White <git@njw.name>2019-05-14 18:02:34 +0100
commitf49a8a74a8ef2c96cc2bbf34461a8387f7e324d8 (patch)
treeec3c67a1cd6c5cfb7a787b2936799af72bc18743
parent544c42ebae13c4f215a0722b4067d4adb715be65 (diff)
Rewrite pgconf to be more accurate by measuring average word confidence rather than average line confidence
-rw-r--r--lib/hocr/hocr.go25
-rw-r--r--pgconf/main.go30
2 files changed, 29 insertions, 26 deletions
diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go
index f6316d8..bfc6a98 100644
--- a/lib/hocr/hocr.go
+++ b/lib/hocr/hocr.go
@@ -98,3 +98,28 @@ func GetText(hocrfn string) (string, error) {
}
return s, nil
}
+
+func GetAvgConf(hocrfn string) (float64, error) {
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return 0, err
+ }
+
+ h, err := Parse(file)
+ if err != nil {
+ return 0, err
+ }
+
+ var total, num float64
+ for _, l := range h.Lines {
+ for _, w := range l.Words {
+ c, err := wordConf(w.Title)
+ if err != nil {
+ return 0, err
+ }
+ total += c
+ num++
+ }
+ }
+ return total / num, nil
+}
diff --git a/pgconf/main.go b/pgconf/main.go
index d254f42..bc09c23 100644
--- a/pgconf/main.go
+++ b/pgconf/main.go
@@ -7,7 +7,6 @@ import (
"os"
"rescribe.xyz/go.git/lib/hocr"
- "rescribe.xyz/go.git/lib/line"
)
func main() {
@@ -22,31 +21,10 @@ func main() {
os.Exit(1)
}
- var err error
- lines := make(line.Details, 0)
-
- for _, f := range flag.Args() {
- var newlines line.Details
- newlines, err = hocr.GetLineBasics(f)
- if err != nil {
- log.Fatal(err)
- }
-
- for _, l := range newlines {
- lines = append(lines, l)
- }
- }
-
- if len(lines) == 0 {
- fmt.Printf("No lines found\n")
- os.Exit(0)
- }
-
- var total float64
- for _, l := range lines {
- total += l.Avgconf
+ avg, err := hocr.GetAvgConf(flag.Arg(0))
+ if err != nil {
+ log.Fatalf("Error retreiving confidence for %s: %v\n", flag.Arg(0), err)
}
- avg := total / float64(len(lines))
- fmt.Printf("%0.0f\n", avg * 100)
+ fmt.Printf("%0.0f\n", avg)
}