diff options
| author | Nick White <git@njw.name> | 2021-08-09 15:51:50 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2021-08-09 15:51:50 +0100 | 
| commit | 69eeb41a33f6a764fc6baf1a95e629a6482b67ea (patch) | |
| tree | 0e28eef52d0eef3405c16d9ee853e43977e33c4f | |
| parent | 1f2a05e466c195dde83effd82c96d4329259d249 (diff) | |
pdf: significantly improve character coordinates
A few good changes to make word coordinate lookups significantly
more accurate:
- Set font size dynamically based on the line height (previously it was
  fixed as size 10)
- Correct height and width of word boxes (previously they were way too
  large, which probably didn't make a difference in the general case,
  but now they're correct)
- Set word box margin to zero
Also change PDF size to A5 paper, as that's closer to an average book page size.
| -rw-r--r-- | pdf.go | 17 | 
1 files changed, 12 insertions, 5 deletions
| @@ -1,4 +1,4 @@ -// Copyright 2019 Nick White. +// Copyright 2021 Nick White.  // Use of this source code is governed by the GPLv3  // license that can be found in the LICENSE file. @@ -22,7 +22,7 @@ import (  )  // TODO: maybe set this in Fpdf struct -const pageWidth = 5    // pageWidth in inches +const pageWidth = 5.8 // pageWidth in inches - 5.8" is A5  const scaleSmaller = 3 // amount the width and height are divided by  // pxToPt converts a pixel value into a pt value (72 pts per inch) @@ -38,7 +38,7 @@ type Fpdf struct {  // Setup creates a new PDF with appropriate settings and fonts  func (p *Fpdf) Setup() error { -	p.fpdf = gofpdf.New("P", "pt", "A4", "") +	p.fpdf = gofpdf.New("P", "pt", "A5", "")  	// Even though it's invisible, we need to add a font which can do  	// UTF-8 so that text renders correctly. @@ -108,13 +108,20 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string, smaller bool) error {  	p.fpdf.SetTextRenderingMode(3)  	for _, l := range h.Lines { +		coords, err := hocr.BoxCoords(l.Title) +		if err != nil { +			continue +		} +		lineheight := coords[3] - coords[1]  		for _, w := range l.Words { -			coords, err := hocr.BoxCoords(w.Title) +			coords, err = hocr.BoxCoords(w.Title)  			if err != nil {  				continue  			}  			p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) -			p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text)+" ", "", 0, "T", false, 0, "") +			p.fpdf.SetCellMargin(0) +			p.fpdf.SetFontSize(pxToPt(lineheight)) +			p.fpdf.CellFormat(pxToPt(coords[2] - coords[0]), pxToPt(coords[3] - coords[1]), html.UnescapeString(w.Text)+" ", "", 0, "T", false, 0, "")  		}  	}  	return p.fpdf.Error() | 
