diff options
| -rw-r--r-- | cmd/pdfbook/main.go | 85 | ||||
| -rw-r--r-- | pdf.go | 90 | 
2 files changed, 102 insertions, 73 deletions
| diff --git a/cmd/pdfbook/main.go b/cmd/pdfbook/main.go index 45bbc4f..3146865 100644 --- a/cmd/pdfbook/main.go +++ b/cmd/pdfbook/main.go @@ -1,24 +1,23 @@  package main  import ( -	"errors"  	"flag"  	"fmt" -	"html" -	"image" -	_ "image/jpeg" -	_ "image/png" -	"io/ioutil"  	"log"  	"os"  	"path"  	"path/filepath"  	"strings" -	"rescribe.xyz/gofpdf" -	"rescribe.xyz/utils/pkg/hocr" +	"rescribe.xyz/bookpipeline"  ) +type Pdfer interface { +	Setup() error +	AddPage(imgpath, hocrpath string) error +	Save(path string) error +} +  const pageWidth = 5 // pageWidth in inches  // pxToPt converts a pixel value into a pt value (72 pts per inch) @@ -27,68 +26,7 @@ func pxToPt(i int) float64 {  	return float64(i) / pageWidth  } -// setupPdf creates a new PDF with appropriate settings and fonts -// TODO: this will go in pdf.go in due course -// TODO: find a font that's closer to the average dimensions of the -//       text we're dealing with, and put it somewhere sensible -func setupPdf() *gofpdf.Fpdf { -	pdf := gofpdf.New("P", "pt", "A4", "") -	// Even though it's invisible, we need to add a font which can do -	// UTF-8 so that text renders correctly. -	pdf.AddUTF8Font("dejavu", "", "DejaVuSansCondensed.ttf") -	pdf.SetFont("dejavu", "", 10) -	pdf.SetAutoPageBreak(false, float64(0)) -	return pdf -} - -// addPage adds a page to the pdf with an image and (invisible) -// text from an hocr file -func addPage(pdf *gofpdf.Fpdf, imgpath string, hocrpath string) error { -	file, err := ioutil.ReadFile(hocrpath) -	if err != nil { -		return errors.New(fmt.Sprintf("Could not read file %s: %v", hocrpath, err)) -	} -	// TODO: change hocr.Parse to take a Reader rather than []byte -	h, err := hocr.Parse(file) -	if err != nil { -		return errors.New(fmt.Sprintf("Could not parse hocr in file %s: %v", hocrpath, err)) -	} - -	f, err := os.Open(imgpath) -	defer f.Close() -	if err != nil { -		return errors.New(fmt.Sprintf("Could not open file %s: %v", imgpath, err)) -	} -	img, _, err := image.Decode(f) -	if err != nil { -		return errors.New(fmt.Sprintf("Could not decode image: %v", err)) -	} -	b := img.Bounds() -	pdf.AddPageFormat("P", gofpdf.SizeType{Wd: pxToPt(b.Dx()), Ht: pxToPt(b.Dy())}) - -	// TODO: check for errors in pdf as going through - -	_ = pdf.RegisterImageOptions(imgpath, gofpdf.ImageOptions{}) -	pdf.ImageOptions(imgpath, 0, 0, pxToPt(b.Dx()), pxToPt(b.Dy()), false, gofpdf.ImageOptions{}, 0, "") - -	pdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible) - -	for _, l := range h.Lines { -		coords, err := hocr.BoxCoords(l.Title) -		if err != nil { -			continue -		} -		pdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) -		pdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "") -	} -	return nil -} - -func savePdf(pdf *gofpdf.Fpdf, p string) error { -	return pdf.OutputFileAndClose(p) -} - -func walker(pdf *gofpdf.Fpdf) filepath.WalkFunc { +func walker(pdf Pdfer) filepath.WalkFunc {  	return func(fpath string, info os.FileInfo, err error) error {  		if info.IsDir() {  			return nil @@ -104,7 +42,7 @@ func walker(pdf *gofpdf.Fpdf) filepath.WalkFunc {  		} else {  			imgpath = strings.TrimSuffix(fpath, ".hocr") + ".png"  		} -		return addPage(pdf, imgpath, fpath) +		return pdf.AddPage(imgpath, fpath)  	}  } @@ -123,14 +61,15 @@ func main() {  		return  	} -	pdf := setupPdf() +	pdf := new(bookpipeline.Fpdf) +	pdf.Setup()  	err := filepath.Walk(flag.Arg(0), walker(pdf))  	if err != nil {  		log.Fatalln("Failed to walk", flag.Arg(0), err)  	} -	err = savePdf(pdf, flag.Arg(1)) +	err = pdf.Save(flag.Arg(1))  	if err != nil {  		log.Fatalln("Failed to save", flag.Arg(1), err)  	} @@ -0,0 +1,90 @@ +package bookpipeline + +import ( +	"errors" +	"fmt" +	"html" +	"image" +	_ "image/jpeg" +	_ "image/png" +	"io/ioutil" +	"os" + +	//"rescribe.xyz/gofpdf@addtextrenderingmode" +	"rescribe.xyz/gofpdf" +	"rescribe.xyz/utils/pkg/hocr" +) + +const pageWidth = 5 // pageWidth in inches + +// pxToPt converts a pixel value into a pt value (72 pts per inch) +// This uses pageWidth to determine the appropriate value +func pxToPt(i int) float64 { +	return float64(i) / pageWidth +} + +type Fpdf struct { +	fpdf *gofpdf.Fpdf +} + +// Setup creates a new PDF with appropriate settings and fonts +// TODO: find a font that's closer to the average dimensions of the +//       text we're dealing with +// TODO: once we have a good font, embed it in the binary as bytes +func (p *Fpdf) Setup() error { +	p.fpdf = gofpdf.New("P", "pt", "A4", "") +	// Even though it's invisible, we need to add a font which can do +	// UTF-8 so that text renders correctly. +	p.fpdf.AddUTF8Font("dejavu", "", "DejaVuSansCondensed.ttf") +	p.fpdf.SetFont("dejavu", "", 10) +	p.fpdf.SetAutoPageBreak(false, float64(0)) +	return p.fpdf.Error() +} + +// AddPage adds a page to the pdf with an image and (invisible) +// text from an hocr file +func (p *Fpdf) AddPage(imgpath, hocrpath string) error { +	file, err := ioutil.ReadFile(hocrpath) +	if err != nil { +		return errors.New(fmt.Sprintf("Could not read file %s: %v", hocrpath, err)) +	} +	// TODO: change hocr.Parse to take a Reader rather than []byte +	h, err := hocr.Parse(file) +	if err != nil { +		return errors.New(fmt.Sprintf("Could not parse hocr in file %s: %v", hocrpath, err)) +	} + +	f, err := os.Open(imgpath) +	defer f.Close() +	if err != nil { +		return errors.New(fmt.Sprintf("Could not open file %s: %v", imgpath, err)) +	} +	img, _, err := image.Decode(f) +	if err != nil { +		return errors.New(fmt.Sprintf("Could not decode image: %v", err)) +	} +	b := img.Bounds() +	p.fpdf.AddPageFormat("P", gofpdf.SizeType{Wd: pxToPt(b.Dx()), Ht: pxToPt(b.Dy())}) + +	// TODO: check for errors in pdf as going through + +	_ = p.fpdf.RegisterImageOptions(imgpath, gofpdf.ImageOptions{}) +	p.fpdf.ImageOptions(imgpath, 0, 0, pxToPt(b.Dx()), pxToPt(b.Dy()), false, gofpdf.ImageOptions{}, 0, "") + +	p.fpdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible) + +	for _, l := range h.Lines { +		coords, err := hocr.BoxCoords(l.Title) +		if err != nil { +			continue +		} +		p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) +		p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "") +	} +	return p.fpdf.Error() +} + +// Save saves the PDF to the file at path +func (p *Fpdf) Save(path string) error { +	return p.fpdf.OutputFileAndClose(path) +} | 
