diff options
author | Nick White <git@njw.name> | 2019-02-25 13:01:28 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-02-25 13:01:28 +0000 |
commit | cd1fb1c9f6e1384ac0add8904425e6f92b17a704 (patch) | |
tree | 4b634aca131fa95ecb761904d312322386a38420 /lib/hocr/hocr.go | |
parent | 3c4c5f7c202b7c54ca8d23e7bd7bff4a4bb696cc (diff) |
Generalise get text from hocr lines
Diffstat (limited to 'lib/hocr/hocr.go')
-rw-r--r-- | lib/hocr/hocr.go | 29 |
1 files changed, 2 insertions, 27 deletions
diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go index fbf1523..f6316d8 100644 --- a/lib/hocr/hocr.go +++ b/lib/hocr/hocr.go @@ -92,34 +92,9 @@ func GetText(hocrfn string) (string, error) { return s, err } + for _, l := range h.Lines { - linetext := l.Text - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - linetext += w.Text + " " - } - } - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - for _, c := range w.Chars { - if c.Class != "ocrx_cinfo" { - continue - } - linetext += c.Text - } - linetext += " " - } - } - linetext = strings.TrimRight(linetext, " ") + "\n" - s += linetext + s += getLineText(l) } return s, nil } |