From 6b4e704befb7f82627d2c9a4e3f4e2971fdaf883 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 14 May 2019 11:20:33 +0100 Subject: Add pgconf tool that prints the overall confidence for a whole page of hocr --- lib/hocr/lines.go | 17 +++++++++++++++++ pgconf/main.go | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 pgconf/main.go diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go index 20c6550..74e8f9a 100644 --- a/lib/hocr/lines.go +++ b/lib/hocr/lines.go @@ -112,3 +112,20 @@ func GetLineDetails(hocrfn string) (line.Details, error) { n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) return parseLineDetails(h, img, n) } + +func GetLineBasics(hocrfn string) (line.Details, error) { + var newlines line.Details + + file, err := ioutil.ReadFile(hocrfn) + if err != nil { + return newlines, err + } + + h, err := Parse(file) + if err != nil { + return newlines, err + } + + n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) + return parseLineDetails(h, nil, n) +} diff --git a/pgconf/main.go b/pgconf/main.go new file mode 100644 index 0000000..1b70ecc --- /dev/null +++ b/pgconf/main.go @@ -0,0 +1,47 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + + "rescribe.xyz/go.git/lib/hocr" + "rescribe.xyz/go.git/lib/line" +) + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: pgconf hocr\n") + fmt.Fprintf(os.Stderr, "Prints the total confidence for a page, as an average of the confidence of each word.\n") + flag.PrintDefaults() + } + flag.Parse() + if flag.NArg() != 1 { + flag.Usage() + os.Exit(1) + } + + var err error + lines := make(line.Details, 0) + + for _, f := range flag.Args() { + var newlines line.Details + newlines, err = hocr.GetLineBasics(f) + if err != nil { + log.Fatal(err) + } + + for _, l := range newlines { + lines = append(lines, l) + } + } + + var total float64 + for _, l := range lines { + total += l.Avgconf + } + avg := total / float64(len(lines)) + + fmt.Printf("%0.2f\n", avg) +} -- cgit v1.2.1-24-ge1ad