From f49a8a74a8ef2c96cc2bbf34461a8387f7e324d8 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 14 May 2019 18:02:34 +0100 Subject: Rewrite pgconf to be more accurate by measuring average word confidence rather than average line confidence --- lib/hocr/hocr.go | 25 +++++++++++++++++++++++++ pgconf/main.go | 30 ++++-------------------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go index f6316d8..bfc6a98 100644 --- a/lib/hocr/hocr.go +++ b/lib/hocr/hocr.go @@ -98,3 +98,28 @@ func GetText(hocrfn string) (string, error) { } return s, nil } + +func GetAvgConf(hocrfn string) (float64, error) { + file, err := ioutil.ReadFile(hocrfn) + if err != nil { + return 0, err + } + + h, err := Parse(file) + if err != nil { + return 0, err + } + + var total, num float64 + for _, l := range h.Lines { + for _, w := range l.Words { + c, err := wordConf(w.Title) + if err != nil { + return 0, err + } + total += c + num++ + } + } + return total / num, nil +} diff --git a/pgconf/main.go b/pgconf/main.go index d254f42..bc09c23 100644 --- a/pgconf/main.go +++ b/pgconf/main.go @@ -7,7 +7,6 @@ import ( "os" "rescribe.xyz/go.git/lib/hocr" - "rescribe.xyz/go.git/lib/line" ) func main() { @@ -22,31 +21,10 @@ func main() { os.Exit(1) } - var err error - lines := make(line.Details, 0) - - for _, f := range flag.Args() { - var newlines line.Details - newlines, err = hocr.GetLineBasics(f) - if err != nil { - log.Fatal(err) - } - - for _, l := range newlines { - lines = append(lines, l) - } - } - - if len(lines) == 0 { - fmt.Printf("No lines found\n") - os.Exit(0) - } - - var total float64 - for _, l := range lines { - total += l.Avgconf + avg, err := hocr.GetAvgConf(flag.Arg(0)) + if err != nil { + log.Fatalf("Error retreiving confidence for %s: %v\n", flag.Arg(0), err) } - avg := total / float64(len(lines)) - fmt.Printf("%0.0f\n", avg * 100) + fmt.Printf("%0.0f\n", avg) } -- cgit v1.2.1-24-ge1ad