summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-10-31 12:30:56 +0000
committerNick White <git@njw.name>2019-10-31 12:30:56 +0000
commita243b1f54156657b1f985a25142ebf9d2da20aa9 (patch)
treed92360ab59246fcf72fee1e52157f3532f85ecb2
parent99ebbef45d19d14636049ec8d863abb38305c6c5 (diff)
Move PDF handling code to a separate file
-rw-r--r--cmd/pdfbook/main.go85
-rw-r--r--pdf.go90
2 files changed, 102 insertions, 73 deletions
diff --git a/cmd/pdfbook/main.go b/cmd/pdfbook/main.go
index 45bbc4f..3146865 100644
--- a/cmd/pdfbook/main.go
+++ b/cmd/pdfbook/main.go
@@ -1,24 +1,23 @@
package main
import (
- "errors"
"flag"
"fmt"
- "html"
- "image"
- _ "image/jpeg"
- _ "image/png"
- "io/ioutil"
"log"
"os"
"path"
"path/filepath"
"strings"
- "rescribe.xyz/gofpdf"
- "rescribe.xyz/utils/pkg/hocr"
+ "rescribe.xyz/bookpipeline"
)
+type Pdfer interface {
+ Setup() error
+ AddPage(imgpath, hocrpath string) error
+ Save(path string) error
+}
+
const pageWidth = 5 // pageWidth in inches
// pxToPt converts a pixel value into a pt value (72 pts per inch)
@@ -27,68 +26,7 @@ func pxToPt(i int) float64 {
return float64(i) / pageWidth
}
-// setupPdf creates a new PDF with appropriate settings and fonts
-// TODO: this will go in pdf.go in due course
-// TODO: find a font that's closer to the average dimensions of the
-// text we're dealing with, and put it somewhere sensible
-func setupPdf() *gofpdf.Fpdf {
- pdf := gofpdf.New("P", "pt", "A4", "")
- // Even though it's invisible, we need to add a font which can do
- // UTF-8 so that text renders correctly.
- pdf.AddUTF8Font("dejavu", "", "DejaVuSansCondensed.ttf")
- pdf.SetFont("dejavu", "", 10)
- pdf.SetAutoPageBreak(false, float64(0))
- return pdf
-}
-
-// addPage adds a page to the pdf with an image and (invisible)
-// text from an hocr file
-func addPage(pdf *gofpdf.Fpdf, imgpath string, hocrpath string) error {
- file, err := ioutil.ReadFile(hocrpath)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not read file %s: %v", hocrpath, err))
- }
- // TODO: change hocr.Parse to take a Reader rather than []byte
- h, err := hocr.Parse(file)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not parse hocr in file %s: %v", hocrpath, err))
- }
-
- f, err := os.Open(imgpath)
- defer f.Close()
- if err != nil {
- return errors.New(fmt.Sprintf("Could not open file %s: %v", imgpath, err))
- }
- img, _, err := image.Decode(f)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not decode image: %v", err))
- }
- b := img.Bounds()
- pdf.AddPageFormat("P", gofpdf.SizeType{Wd: pxToPt(b.Dx()), Ht: pxToPt(b.Dy())})
-
- // TODO: check for errors in pdf as going through
-
- _ = pdf.RegisterImageOptions(imgpath, gofpdf.ImageOptions{})
- pdf.ImageOptions(imgpath, 0, 0, pxToPt(b.Dx()), pxToPt(b.Dy()), false, gofpdf.ImageOptions{}, 0, "")
-
- pdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible)
-
- for _, l := range h.Lines {
- coords, err := hocr.BoxCoords(l.Title)
- if err != nil {
- continue
- }
- pdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1]))
- pdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "")
- }
- return nil
-}
-
-func savePdf(pdf *gofpdf.Fpdf, p string) error {
- return pdf.OutputFileAndClose(p)
-}
-
-func walker(pdf *gofpdf.Fpdf) filepath.WalkFunc {
+func walker(pdf Pdfer) filepath.WalkFunc {
return func(fpath string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
@@ -104,7 +42,7 @@ func walker(pdf *gofpdf.Fpdf) filepath.WalkFunc {
} else {
imgpath = strings.TrimSuffix(fpath, ".hocr") + ".png"
}
- return addPage(pdf, imgpath, fpath)
+ return pdf.AddPage(imgpath, fpath)
}
}
@@ -123,14 +61,15 @@ func main() {
return
}
- pdf := setupPdf()
+ pdf := new(bookpipeline.Fpdf)
+ pdf.Setup()
err := filepath.Walk(flag.Arg(0), walker(pdf))
if err != nil {
log.Fatalln("Failed to walk", flag.Arg(0), err)
}
- err = savePdf(pdf, flag.Arg(1))
+ err = pdf.Save(flag.Arg(1))
if err != nil {
log.Fatalln("Failed to save", flag.Arg(1), err)
}
diff --git a/pdf.go b/pdf.go
new file mode 100644
index 0000000..e7480fe
--- /dev/null
+++ b/pdf.go
@@ -0,0 +1,90 @@
+package bookpipeline
+
+import (
+ "errors"
+ "fmt"
+ "html"
+ "image"
+ _ "image/jpeg"
+ _ "image/png"
+ "io/ioutil"
+ "os"
+
+ //"rescribe.xyz/gofpdf@addtextrenderingmode"
+ "rescribe.xyz/gofpdf"
+ "rescribe.xyz/utils/pkg/hocr"
+)
+
+const pageWidth = 5 // pageWidth in inches
+
+// pxToPt converts a pixel value into a pt value (72 pts per inch)
+// This uses pageWidth to determine the appropriate value
+func pxToPt(i int) float64 {
+ return float64(i) / pageWidth
+}
+
+type Fpdf struct {
+ fpdf *gofpdf.Fpdf
+}
+
+// Setup creates a new PDF with appropriate settings and fonts
+// TODO: find a font that's closer to the average dimensions of the
+// text we're dealing with
+// TODO: once we have a good font, embed it in the binary as bytes
+func (p *Fpdf) Setup() error {
+ p.fpdf = gofpdf.New("P", "pt", "A4", "")
+ // Even though it's invisible, we need to add a font which can do
+ // UTF-8 so that text renders correctly.
+ p.fpdf.AddUTF8Font("dejavu", "", "DejaVuSansCondensed.ttf")
+ p.fpdf.SetFont("dejavu", "", 10)
+ p.fpdf.SetAutoPageBreak(false, float64(0))
+ return p.fpdf.Error()
+}
+
+// AddPage adds a page to the pdf with an image and (invisible)
+// text from an hocr file
+func (p *Fpdf) AddPage(imgpath, hocrpath string) error {
+ file, err := ioutil.ReadFile(hocrpath)
+ if err != nil {
+ return errors.New(fmt.Sprintf("Could not read file %s: %v", hocrpath, err))
+ }
+ // TODO: change hocr.Parse to take a Reader rather than []byte
+ h, err := hocr.Parse(file)
+ if err != nil {
+ return errors.New(fmt.Sprintf("Could not parse hocr in file %s: %v", hocrpath, err))
+ }
+
+ f, err := os.Open(imgpath)
+ defer f.Close()
+ if err != nil {
+ return errors.New(fmt.Sprintf("Could not open file %s: %v", imgpath, err))
+ }
+ img, _, err := image.Decode(f)
+ if err != nil {
+ return errors.New(fmt.Sprintf("Could not decode image: %v", err))
+ }
+ b := img.Bounds()
+ p.fpdf.AddPageFormat("P", gofpdf.SizeType{Wd: pxToPt(b.Dx()), Ht: pxToPt(b.Dy())})
+
+ // TODO: check for errors in pdf as going through
+
+ _ = p.fpdf.RegisterImageOptions(imgpath, gofpdf.ImageOptions{})
+ p.fpdf.ImageOptions(imgpath, 0, 0, pxToPt(b.Dx()), pxToPt(b.Dy()), false, gofpdf.ImageOptions{}, 0, "")
+
+ p.fpdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible)
+
+ for _, l := range h.Lines {
+ coords, err := hocr.BoxCoords(l.Title)
+ if err != nil {
+ continue
+ }
+ p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1]))
+ p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "")
+ }
+ return p.fpdf.Error()
+}
+
+// Save saves the PDF to the file at path
+func (p *Fpdf) Save(path string) error {
+ return p.fpdf.OutputFileAndClose(path)
+}