diff options
Diffstat (limited to 'pkg/hocr')
-rw-r--r-- | pkg/hocr/hocr.go | 9 | ||||
-rw-r--r-- | pkg/hocr/lines.go | 5 |
2 files changed, 11 insertions, 3 deletions
diff --git a/pkg/hocr/hocr.go b/pkg/hocr/hocr.go index 9dea49c..6b43558 100644 --- a/pkg/hocr/hocr.go +++ b/pkg/hocr/hocr.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// hocr contains structures and functions for parsing and analysing +// hocr files package hocr import ( @@ -51,6 +53,7 @@ func wordConf(s string) (float64, error) { return strconv.ParseFloat(conf[1], 64) } +// BoxCoords parses bbox coordinate strings func BoxCoords(s string) ([4]int, error) { var coords [4]int re, err := regexp.Compile(`bbox ([0-9]+) ([0-9]+) ([0-9]+) ([0-9]+)`) @@ -73,6 +76,7 @@ func noText(s string) bool { return len(t) == 0 } +// Parse parses a hOCR file func Parse(b []byte) (Hocr, error) { var hocr Hocr @@ -84,6 +88,7 @@ func Parse(b []byte) (Hocr, error) { return hocr, nil } +// GetText parses a hOCR file and extracts the text from it func GetText(hocrfn string) (string, error) { var s string @@ -104,6 +109,8 @@ func GetText(hocrfn string) (string, error) { return s, nil } +// GetAvgConf calculates the average confidence of a hOCR file from +// confidences embedded in each word func GetAvgConf(hocrfn string) (float64, error) { file, err := ioutil.ReadFile(hocrfn) if err != nil { @@ -134,7 +141,7 @@ func GetAvgConf(hocrfn string) (float64, error) { // GetWordConfs is a utility function that parses a hocr // file and returns an array containing the confidences -// of each word therein. +// of each word therein func GetWordConfs(hocrfn string) ([]float64, error) { var confs []float64 diff --git a/pkg/hocr/lines.go b/pkg/hocr/lines.go index 1387574..942bd01 100644 --- a/pkg/hocr/lines.go +++ b/pkg/hocr/lines.go @@ -20,6 +20,7 @@ import ( "rescribe.xyz/utils/pkg/line" ) +// LineText extracts the text from an OcrLine func LineText(l OcrLine) (string) { linetext := "" @@ -88,7 +89,7 @@ func parseLineDetails(h Hocr, i *image.Gray, name string) (line.Details, error) } // GetLineDetails parses a hocr file and returns a corresponding -// line.Details, including image extracts for each line. +// line.Details, including image extracts for each line func GetLineDetails(hocrfn string) (line.Details, error) { var newlines line.Details @@ -121,7 +122,7 @@ func GetLineDetails(hocrfn string) (line.Details, error) { } // GetLineBasics parses a hocr file and returns a corresponding -// line.Details, without any image extracts. +// line.Details, without any image extracts func GetLineBasics(hocrfn string) (line.Details, error) { var newlines line.Details |