From cd1fb1c9f6e1384ac0add8904425e6f92b17a704 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 25 Feb 2019 13:01:28 +0000 Subject: Generalise get text from hocr lines --- lib/hocr/hocr.go | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'lib/hocr/hocr.go') diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go index fbf1523..f6316d8 100644 --- a/lib/hocr/hocr.go +++ b/lib/hocr/hocr.go @@ -92,34 +92,9 @@ func GetText(hocrfn string) (string, error) { return s, err } + for _, l := range h.Lines { - linetext := l.Text - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - linetext += w.Text + " " - } - } - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - for _, c := range w.Chars { - if c.Class != "ocrx_cinfo" { - continue - } - linetext += c.Text - } - linetext += " " - } - } - linetext = strings.TrimRight(linetext, " ") + "\n" - s += linetext + s += getLineText(l) } return s, nil } -- cgit v1.2.1-24-ge1ad