diff options
author | Nick White <git@njw.name> | 2019-05-14 11:20:33 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-05-14 11:20:33 +0100 |
commit | 6b4e704befb7f82627d2c9a4e3f4e2971fdaf883 (patch) | |
tree | dcc721c2791377ceeb193f80941a9ad76483e380 | |
parent | 94601a82bd3db51b749a1e44ad116250ba545bee (diff) |
Add pgconf tool that prints the overall confidence for a whole page of hocr
-rw-r--r-- | lib/hocr/lines.go | 17 | ||||
-rw-r--r-- | pgconf/main.go | 47 |
2 files changed, 64 insertions, 0 deletions
diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go index 20c6550..74e8f9a 100644 --- a/lib/hocr/lines.go +++ b/lib/hocr/lines.go @@ -112,3 +112,20 @@ func GetLineDetails(hocrfn string) (line.Details, error) { n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) return parseLineDetails(h, img, n) } + +func GetLineBasics(hocrfn string) (line.Details, error) { + var newlines line.Details + + file, err := ioutil.ReadFile(hocrfn) + if err != nil { + return newlines, err + } + + h, err := Parse(file) + if err != nil { + return newlines, err + } + + n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) + return parseLineDetails(h, nil, n) +} diff --git a/pgconf/main.go b/pgconf/main.go new file mode 100644 index 0000000..1b70ecc --- /dev/null +++ b/pgconf/main.go @@ -0,0 +1,47 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + + "rescribe.xyz/go.git/lib/hocr" + "rescribe.xyz/go.git/lib/line" +) + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: pgconf hocr\n") + fmt.Fprintf(os.Stderr, "Prints the total confidence for a page, as an average of the confidence of each word.\n") + flag.PrintDefaults() + } + flag.Parse() + if flag.NArg() != 1 { + flag.Usage() + os.Exit(1) + } + + var err error + lines := make(line.Details, 0) + + for _, f := range flag.Args() { + var newlines line.Details + newlines, err = hocr.GetLineBasics(f) + if err != nil { + log.Fatal(err) + } + + for _, l := range newlines { + lines = append(lines, l) + } + } + + var total float64 + for _, l := range lines { + total += l.Avgconf + } + avg := total / float64(len(lines)) + + fmt.Printf("%0.2f\n", avg) +} |