summaryrefslogtreecommitdiff
path: root/parse/hocr
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-01-24 18:13:35 +0000
committerNick White <git@njw.name>2019-01-24 18:13:35 +0000
commit0efed080dec35be85bd8f0388a062c79c5ac544a (patch)
treeed6abe3ac729963a4e09bc573d80013f6ab03839 /parse/hocr
parent6e08494a8883a8e304ef02ea352a11776ab2294c (diff)
Better separation between library and tool
Diffstat (limited to 'parse/hocr')
-rw-r--r--parse/hocr/hocr.go36
1 files changed, 34 insertions, 2 deletions
diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go
index c03b73a..3599bef 100644
--- a/parse/hocr/hocr.go
+++ b/parse/hocr/hocr.go
@@ -9,6 +9,10 @@ package hocr
import (
"encoding/xml"
"image"
+ "image/png"
+ "io/ioutil"
+ "os"
+ "path/filepath"
"regexp"
"strconv"
"strings"
@@ -76,7 +80,7 @@ func noText(s string) bool {
return len(t) == 0
}
-func Parse(b []byte) (Hocr, error) {
+func parseIt(b []byte) (Hocr, error) {
var hocr Hocr
err := xml.Unmarshal(b, &hocr)
@@ -87,7 +91,7 @@ func Parse(b []byte) (Hocr, error) {
return hocr, nil
}
-func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) {
+func parseLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) {
lines := make(parse.LineDetails, 0)
for _, l := range h.Lines {
@@ -147,3 +151,31 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro
}
return lines, nil
}
+
+func GetLineDetails(hocrfn string) (parse.LineDetails, error) {
+ var newlines parse.LineDetails
+
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return newlines, err
+ }
+
+ h, err := parseIt(file)
+ if err != nil {
+ return newlines, err
+ }
+
+ pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1)
+ pngf, err := os.Open(pngfn)
+ if err != nil {
+ return newlines, err
+ }
+ defer pngf.Close()
+ img, err := png.Decode(pngf)
+ if err != nil {
+ return newlines, err
+ }
+
+ n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
+ return parseLineDetails(h, img, n)
+}