diff options
| author | Nick White <git@njw.name> | 2019-10-31 12:58:45 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-10-31 12:58:45 +0000 | 
| commit | 425e2146190ecb0c083817093bd4819c517edd86 (patch) | |
| tree | 62a5cc26e352f104ac0d0351b2bc9bf4024c7a23 | |
| parent | 2c65294498ce09771b88fd0ee027019fe2678d5a (diff) | |
PDF: lay out every word with coordinates separately
I presumed this would mean that multiple words next to
each other couldn't be reliably searched for, but this
seems not to be the case.
| -rw-r--r-- | pdf.go | 12 | 
1 files changed, 7 insertions, 5 deletions
@@ -74,12 +74,14 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string) error {  	p.fpdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible)  	for _, l := range h.Lines { -		coords, err := hocr.BoxCoords(l.Title) -		if err != nil { -			continue +		for _, w := range l.Words { +			coords, err := hocr.BoxCoords(w.Title) +			if err != nil { +				continue +			} +			p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) +			p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text), "", 0, "T", false, 0, "")  		} -		p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) -		p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "")  	}  	return p.fpdf.Error()  }  | 
