summaryrefslogtreecommitdiff
path: root/lib/hocr/hocr.go
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-02-25 13:01:28 +0000
committerNick White <git@njw.name>2019-02-25 13:01:28 +0000
commitcd1fb1c9f6e1384ac0add8904425e6f92b17a704 (patch)
tree4b634aca131fa95ecb761904d312322386a38420 /lib/hocr/hocr.go
parent3c4c5f7c202b7c54ca8d23e7bd7bff4a4bb696cc (diff)
Generalise get text from hocr lines
Diffstat (limited to 'lib/hocr/hocr.go')
-rw-r--r--lib/hocr/hocr.go29
1 files changed, 2 insertions, 27 deletions
diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go
index fbf1523..f6316d8 100644
--- a/lib/hocr/hocr.go
+++ b/lib/hocr/hocr.go
@@ -92,34 +92,9 @@ func GetText(hocrfn string) (string, error) {
return s, err
}
+
for _, l := range h.Lines {
- linetext := l.Text
- if noText(linetext) {
- linetext = ""
- for _, w := range l.Words {
- if w.Class != "ocrx_word" {
- continue
- }
- linetext += w.Text + " "
- }
- }
- if noText(linetext) {
- linetext = ""
- for _, w := range l.Words {
- if w.Class != "ocrx_word" {
- continue
- }
- for _, c := range w.Chars {
- if c.Class != "ocrx_cinfo" {
- continue
- }
- linetext += c.Text
- }
- linetext += " "
- }
- }
- linetext = strings.TrimRight(linetext, " ") + "\n"
- s += linetext
+ s += getLineText(l)
}
return s, nil
}