diff options
| author | Nick White <git@njw.name> | 2019-01-24 18:13:35 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-01-24 18:13:35 +0000 | 
| commit | 0efed080dec35be85bd8f0388a062c79c5ac544a (patch) | |
| tree | ed6abe3ac729963a4e09bc573d80013f6ab03839 /parse/hocr | |
| parent | 6e08494a8883a8e304ef02ea352a11776ab2294c (diff) | |
Better separation between library and tool
Diffstat (limited to 'parse/hocr')
| -rw-r--r-- | parse/hocr/hocr.go | 36 | 
1 files changed, 34 insertions, 2 deletions
| diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go index c03b73a..3599bef 100644 --- a/parse/hocr/hocr.go +++ b/parse/hocr/hocr.go @@ -9,6 +9,10 @@ package hocr  import (  	"encoding/xml"  	"image" +	"image/png" +	"io/ioutil" +	"os" +	"path/filepath"  	"regexp"  	"strconv"  	"strings" @@ -76,7 +80,7 @@ func noText(s string) bool {  	return len(t) == 0  } -func Parse(b []byte) (Hocr, error) { +func parseIt(b []byte) (Hocr, error) {  	var hocr Hocr  	err := xml.Unmarshal(b, &hocr) @@ -87,7 +91,7 @@ func Parse(b []byte) (Hocr, error) {  	return hocr, nil  } -func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) { +func parseLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) {  	lines := make(parse.LineDetails, 0)  	for _, l := range h.Lines { @@ -147,3 +151,31 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro  	}  	return lines, nil  } + +func GetLineDetails(hocrfn string) (parse.LineDetails, error) { +	var newlines parse.LineDetails + +	file, err := ioutil.ReadFile(hocrfn) +	if err != nil { +		return newlines, err +	} + +	h, err := parseIt(file) +	if err != nil { +		return newlines, err +	} + +	pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1) +	pngf, err := os.Open(pngfn) +	if err != nil { +		return newlines, err +	} +	defer pngf.Close() +	img, err := png.Decode(pngf) +	if err != nil { +		return newlines, err +	} + +	n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) +	return parseLineDetails(h, img, n) +} | 
