diff options
| author | Nick White <git@njw.name> | 2021-07-23 16:25:38 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2021-07-23 16:25:38 +0100 | 
| commit | 10eb175a5f651748a57297b35d86a3c9c0987e80 (patch) | |
| tree | ce5c7a6bd635d157a0f94da0a21b5f42e20ae308 | |
| parent | 9dc9796fa637829b8be72d0721dc2da707c84451 (diff) | |
dehyphenate: Update to reflect multiple page support in hocr package
| -rw-r--r-- | cmd/dehyphenate/main.go | 18 | 
1 files changed, 10 insertions, 8 deletions
diff --git a/cmd/dehyphenate/main.go b/cmd/dehyphenate/main.go index 284fc1a..4d48e08 100644 --- a/cmd/dehyphenate/main.go +++ b/cmd/dehyphenate/main.go @@ -77,15 +77,17 @@ func main() {  			log.Fatal(err)  		} -		for i, l := range h.Lines { -			w := l.Words[len(l.Words)-1] -			if len(w.Chars) == 0 { -				if len(w.Text) > 0 && w.Text[len(w.Text)-1] == '-' { -					h.Lines[i].Words[len(l.Words)-1].Text = w.Text[0:len(w.Text)-1] + h.Lines[i+1].Words[0].Text -					h.Lines[i+1].Words[0].Text = "" +		for _, p := range h.Pages { +			for i, l := range p.Lines { +				w := l.Words[len(l.Words)-1] +				if len(w.Chars) == 0 { +					if len(w.Text) > 0 && w.Text[len(w.Text)-1] == '-' { +						p.Lines[i].Words[len(l.Words)-1].Text = w.Text[0:len(w.Text)-1] + p.Lines[i+1].Words[0].Text +						p.Lines[i+1].Words[0].Text = "" +					} +				} else { +					log.Printf("TODO: handle OcrChar")  				} -			} else { -				log.Printf("TODO: handle OcrChar")  			}  		}  	} else {  | 
