From 0efed080dec35be85bd8f0388a062c79c5ac544a Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Thu, 24 Jan 2019 18:13:35 +0000
Subject: Better separation between library and tool

---
 bucket-lines/bucket-lines.go | 39 +++------------------------------------
 parse/hocr/hocr.go           | 36 ++++++++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/bucket-lines/bucket-lines.go b/bucket-lines/bucket-lines.go
index ad73fcd..186f568 100644
--- a/bucket-lines/bucket-lines.go
+++ b/bucket-lines/bucket-lines.go
@@ -3,51 +3,18 @@ package main
 import (
 	"flag"
 	"fmt"
-	"image/png"
-	"io/ioutil"
 	"log"
 	"os"
 	"path/filepath"
-	"strings"
 
 	"git.rescribe.xyz/testingtools/parse"
 	"git.rescribe.xyz/testingtools/parse/hocr"
 	"git.rescribe.xyz/testingtools/parse/prob"
 )
 
-// TODO: maybe move this into hocr.go
-func detailsFromHocr(f string) (parse.LineDetails, error) {
-	var newlines parse.LineDetails
-
-	file, err := ioutil.ReadFile(f)
-	if err != nil {
-		return newlines, err
-	}
-
-	h, err := hocr.Parse(file)
-	if err != nil {
-		return newlines, err
-	}
-
-	pngfn := strings.Replace(f, ".hocr", ".png", 1)
-	pngf, err := os.Open(pngfn)
-	if err != nil {
-		return newlines, err
-	}
-	defer pngf.Close()
-	img, err := png.Decode(pngf)
-	if err != nil {
-		return newlines, err
-	}
-
-	n := strings.Replace(filepath.Base(f), ".hocr", "", 1)
-	return hocr.GetLineDetails(h, img, n)
-}
-
 func main() {
-	// TODO: Allow different specs to be used for .prob vs .hocr. Do this
-	//       by adding a field to LineDetails that is linked to a named
-	//       BucketSpecs.
+	// TODO: Allow bucket specs to be determined by a json file passed
+	//       as an argument.
 	b := parse.BucketSpecs{
 		// minimum confidence, name
 		{ 0, "bad" },
@@ -83,7 +50,7 @@ func main() {
 			case ".prob":
 				newlines, err = prob.GetLineDetails(f)
 			case ".hocr":
-				newlines, err = detailsFromHocr(f)
+				newlines, err = hocr.GetLineDetails(f)
 			default:
 				log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f)
 		}
diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go
index c03b73a..3599bef 100644
--- a/parse/hocr/hocr.go
+++ b/parse/hocr/hocr.go
@@ -9,6 +9,10 @@ package hocr
 import (
 	"encoding/xml"
 	"image"
+	"image/png"
+	"io/ioutil"
+	"os"
+	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
@@ -76,7 +80,7 @@ func noText(s string) bool {
 	return len(t) == 0
 }
 
-func Parse(b []byte) (Hocr, error) {
+func parseIt(b []byte) (Hocr, error) {
 	var hocr Hocr
 
 	err := xml.Unmarshal(b, &hocr)
@@ -87,7 +91,7 @@ func Parse(b []byte) (Hocr, error) {
 	return hocr, nil
 }
 
-func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) {
+func parseLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) {
 	lines := make(parse.LineDetails, 0)
 
 	for _, l := range h.Lines {
@@ -147,3 +151,31 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro
 	}
 	return lines, nil
 }
+
+func GetLineDetails(hocrfn string) (parse.LineDetails, error) {
+	var newlines parse.LineDetails
+
+	file, err := ioutil.ReadFile(hocrfn)
+	if err != nil {
+		return newlines, err
+	}
+
+	h, err := parseIt(file)
+	if err != nil {
+		return newlines, err
+	}
+
+	pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1)
+	pngf, err := os.Open(pngfn)
+	if err != nil {
+		return newlines, err
+	}
+	defer pngf.Close()
+	img, err := png.Decode(pngf)
+	if err != nil {
+		return newlines, err
+	}
+
+	n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
+	return parseLineDetails(h, img, n)
+}
-- 
cgit v1.2.1-24-ge1ad