diff options
-rw-r--r-- | bucket-lines/bucket-lines.go | 39 | ||||
-rw-r--r-- | parse/hocr/hocr.go | 36 |
2 files changed, 37 insertions, 38 deletions
diff --git a/bucket-lines/bucket-lines.go b/bucket-lines/bucket-lines.go index ad73fcd..186f568 100644 --- a/bucket-lines/bucket-lines.go +++ b/bucket-lines/bucket-lines.go @@ -3,51 +3,18 @@ package main import ( "flag" "fmt" - "image/png" - "io/ioutil" "log" "os" "path/filepath" - "strings" "git.rescribe.xyz/testingtools/parse" "git.rescribe.xyz/testingtools/parse/hocr" "git.rescribe.xyz/testingtools/parse/prob" ) -// TODO: maybe move this into hocr.go -func detailsFromHocr(f string) (parse.LineDetails, error) { - var newlines parse.LineDetails - - file, err := ioutil.ReadFile(f) - if err != nil { - return newlines, err - } - - h, err := hocr.Parse(file) - if err != nil { - return newlines, err - } - - pngfn := strings.Replace(f, ".hocr", ".png", 1) - pngf, err := os.Open(pngfn) - if err != nil { - return newlines, err - } - defer pngf.Close() - img, err := png.Decode(pngf) - if err != nil { - return newlines, err - } - - n := strings.Replace(filepath.Base(f), ".hocr", "", 1) - return hocr.GetLineDetails(h, img, n) -} - func main() { - // TODO: Allow different specs to be used for .prob vs .hocr. Do this - // by adding a field to LineDetails that is linked to a named - // BucketSpecs. + // TODO: Allow bucket specs to be determined by a json file passed + // as an argument. b := parse.BucketSpecs{ // minimum confidence, name { 0, "bad" }, @@ -83,7 +50,7 @@ func main() { case ".prob": newlines, err = prob.GetLineDetails(f) case ".hocr": - newlines, err = detailsFromHocr(f) + newlines, err = hocr.GetLineDetails(f) default: log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f) } diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go index c03b73a..3599bef 100644 --- a/parse/hocr/hocr.go +++ b/parse/hocr/hocr.go @@ -9,6 +9,10 @@ package hocr import ( "encoding/xml" "image" + "image/png" + "io/ioutil" + "os" + "path/filepath" "regexp" "strconv" "strings" @@ -76,7 +80,7 @@ func noText(s string) bool { return len(t) == 0 } -func Parse(b []byte) (Hocr, error) { +func parseIt(b []byte) (Hocr, error) { var hocr Hocr err := xml.Unmarshal(b, &hocr) @@ -87,7 +91,7 @@ func Parse(b []byte) (Hocr, error) { return hocr, nil } -func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) { +func parseLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, error) { lines := make(parse.LineDetails, 0) for _, l := range h.Lines { @@ -147,3 +151,31 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro } return lines, nil } + +func GetLineDetails(hocrfn string) (parse.LineDetails, error) { + var newlines parse.LineDetails + + file, err := ioutil.ReadFile(hocrfn) + if err != nil { + return newlines, err + } + + h, err := parseIt(file) + if err != nil { + return newlines, err + } + + pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1) + pngf, err := os.Open(pngfn) + if err != nil { + return newlines, err + } + defer pngf.Close() + img, err := png.Decode(pngf) + if err != nil { + return newlines, err + } + + n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) + return parseLineDetails(h, img, n) +} |