summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-10-31 12:58:45 +0000
committerNick White <git@njw.name>2019-10-31 12:58:45 +0000
commit425e2146190ecb0c083817093bd4819c517edd86 (patch)
tree62a5cc26e352f104ac0d0351b2bc9bf4024c7a23
parent2c65294498ce09771b88fd0ee027019fe2678d5a (diff)
PDF: lay out every word with coordinates separately
I presumed this would mean that multiple words next to each other couldn't be reliably searched for, but this seems not to be the case.
-rw-r--r--pdf.go12
1 files changed, 7 insertions, 5 deletions
diff --git a/pdf.go b/pdf.go
index e7480fe..bfd2d71 100644
--- a/pdf.go
+++ b/pdf.go
@@ -74,12 +74,14 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string) error {
p.fpdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible)
for _, l := range h.Lines {
- coords, err := hocr.BoxCoords(l.Title)
- if err != nil {
- continue
+ for _, w := range l.Words {
+ coords, err := hocr.BoxCoords(w.Title)
+ if err != nil {
+ continue
+ }
+ p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1]))
+ p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text), "", 0, "T", false, 0, "")
}
- p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1]))
- p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "")
}
return p.fpdf.Error()
}