diff options
author | Nick White <git@njw.name> | 2019-10-31 12:58:45 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-10-31 12:58:45 +0000 |
commit | 425e2146190ecb0c083817093bd4819c517edd86 (patch) | |
tree | 62a5cc26e352f104ac0d0351b2bc9bf4024c7a23 | |
parent | 2c65294498ce09771b88fd0ee027019fe2678d5a (diff) |
PDF: lay out every word with coordinates separately
I presumed this would mean that multiple words next to
each other couldn't be reliably searched for, but this
seems not to be the case.
-rw-r--r-- | pdf.go | 12 |
1 files changed, 7 insertions, 5 deletions
@@ -74,12 +74,14 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string) error { p.fpdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible) for _, l := range h.Lines { - coords, err := hocr.BoxCoords(l.Title) - if err != nil { - continue + for _, w := range l.Words { + coords, err := hocr.BoxCoords(w.Title) + if err != nil { + continue + } + p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) + p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text), "", 0, "T", false, 0, "") } - p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) - p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "") } return p.fpdf.Error() } |