summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2021-07-23 16:25:38 +0100
committerNick White <git@njw.name>2021-07-23 16:25:38 +0100
commit10eb175a5f651748a57297b35d86a3c9c0987e80 (patch)
treece5c7a6bd635d157a0f94da0a21b5f42e20ae308
parent9dc9796fa637829b8be72d0721dc2da707c84451 (diff)
dehyphenate: Update to reflect multiple page support in hocr package
-rw-r--r--cmd/dehyphenate/main.go18
1 files changed, 10 insertions, 8 deletions
diff --git a/cmd/dehyphenate/main.go b/cmd/dehyphenate/main.go
index 284fc1a..4d48e08 100644
--- a/cmd/dehyphenate/main.go
+++ b/cmd/dehyphenate/main.go
@@ -77,15 +77,17 @@ func main() {
log.Fatal(err)
}
- for i, l := range h.Lines {
- w := l.Words[len(l.Words)-1]
- if len(w.Chars) == 0 {
- if len(w.Text) > 0 && w.Text[len(w.Text)-1] == '-' {
- h.Lines[i].Words[len(l.Words)-1].Text = w.Text[0:len(w.Text)-1] + h.Lines[i+1].Words[0].Text
- h.Lines[i+1].Words[0].Text = ""
+ for _, p := range h.Pages {
+ for i, l := range p.Lines {
+ w := l.Words[len(l.Words)-1]
+ if len(w.Chars) == 0 {
+ if len(w.Text) > 0 && w.Text[len(w.Text)-1] == '-' {
+ p.Lines[i].Words[len(l.Words)-1].Text = w.Text[0:len(w.Text)-1] + p.Lines[i+1].Words[0].Text
+ p.Lines[i+1].Words[0].Text = ""
+ }
+ } else {
+ log.Printf("TODO: handle OcrChar")
}
- } else {
- log.Printf("TODO: handle OcrChar")
}
}
} else {