From 0efed080dec35be85bd8f0388a062c79c5ac544a Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Thu, 24 Jan 2019 18:13:35 +0000
Subject: Better separation between library and tool

---
 parse/hocr/hocr.go | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

(limited to 'parse/hocr')

diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go
index c03b73a..3599bef 100644
--- a/parse/hocr/hocr.go
+++ b/parse/hocr/hocr.go
@@ -9,6 +9,10 @@ package hocr
 import (
 	"encoding/xml"
 	"image"
+	"image/png"
+	"io/ioutil"
+	"os"
+	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
@@ -76,7 +80,7 @@ func noText(s string) bool {
 	return len(t) == 0
 }
 
-func Parse(b []byte) (Hocr, error) {
+func parseIt(b []byte) (Hocr, error) {
 	var hocr Hocr
 
 	err := xml.Unmarshal(b, &hocr)
@@ -87,7 +91,7 @@ func Parse(b []byte) (Hocr, error) {
 	return hocr, nil
 }
 
-func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) {
+func parseLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) {
 	lines := make(parse.LineDetails, 0)
 
 	for _, l := range h.Lines {
@@ -147,3 +151,31 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro
 	}
 	return lines, nil
 }
+
+func GetLineDetails(hocrfn string) (parse.LineDetails, error) {
+	var newlines parse.LineDetails
+
+	file, err := ioutil.ReadFile(hocrfn)
+	if err != nil {
+		return newlines, err
+	}
+
+	h, err := parseIt(file)
+	if err != nil {
+		return newlines, err
+	}
+
+	pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1)
+	pngf, err := os.Open(pngfn)
+	if err != nil {
+		return newlines, err
+	}
+	defer pngf.Close()
+	img, err := png.Decode(pngf)
+	if err != nil {
+		return newlines, err
+	}
+
+	n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
+	return parseLineDetails(h, img, n)
+}
-- 
cgit v1.2.1-24-ge1ad