From 425e2146190ecb0c083817093bd4819c517edd86 Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 31 Oct 2019 12:58:45 +0000 Subject: PDF: lay out every word with coordinates separately I presumed this would mean that multiple words next to each other couldn't be reliably searched for, but this seems not to be the case. --- pdf.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'pdf.go') diff --git a/pdf.go b/pdf.go index e7480fe..bfd2d71 100644 --- a/pdf.go +++ b/pdf.go @@ -74,12 +74,14 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string) error { p.fpdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible) for _, l := range h.Lines { - coords, err := hocr.BoxCoords(l.Title) - if err != nil { - continue + for _, w := range l.Words { + coords, err := hocr.BoxCoords(w.Title) + if err != nil { + continue + } + p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) + p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text), "", 0, "T", false, 0, "") } - p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) - p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "") } return p.fpdf.Error() } -- cgit v1.2.1-24-ge1ad