diff options
Diffstat (limited to 'cmd/pdfbook')
-rw-r--r-- | cmd/pdfbook/main.go | 85 |
1 files changed, 12 insertions, 73 deletions
diff --git a/cmd/pdfbook/main.go b/cmd/pdfbook/main.go index 45bbc4f..3146865 100644 --- a/cmd/pdfbook/main.go +++ b/cmd/pdfbook/main.go @@ -1,24 +1,23 @@ package main import ( - "errors" "flag" "fmt" - "html" - "image" - _ "image/jpeg" - _ "image/png" - "io/ioutil" "log" "os" "path" "path/filepath" "strings" - "rescribe.xyz/gofpdf" - "rescribe.xyz/utils/pkg/hocr" + "rescribe.xyz/bookpipeline" ) +type Pdfer interface { + Setup() error + AddPage(imgpath, hocrpath string) error + Save(path string) error +} + const pageWidth = 5 // pageWidth in inches // pxToPt converts a pixel value into a pt value (72 pts per inch) @@ -27,68 +26,7 @@ func pxToPt(i int) float64 { return float64(i) / pageWidth } -// setupPdf creates a new PDF with appropriate settings and fonts -// TODO: this will go in pdf.go in due course -// TODO: find a font that's closer to the average dimensions of the -// text we're dealing with, and put it somewhere sensible -func setupPdf() *gofpdf.Fpdf { - pdf := gofpdf.New("P", "pt", "A4", "") - // Even though it's invisible, we need to add a font which can do - // UTF-8 so that text renders correctly. - pdf.AddUTF8Font("dejavu", "", "DejaVuSansCondensed.ttf") - pdf.SetFont("dejavu", "", 10) - pdf.SetAutoPageBreak(false, float64(0)) - return pdf -} - -// addPage adds a page to the pdf with an image and (invisible) -// text from an hocr file -func addPage(pdf *gofpdf.Fpdf, imgpath string, hocrpath string) error { - file, err := ioutil.ReadFile(hocrpath) - if err != nil { - return errors.New(fmt.Sprintf("Could not read file %s: %v", hocrpath, err)) - } - // TODO: change hocr.Parse to take a Reader rather than []byte - h, err := hocr.Parse(file) - if err != nil { - return errors.New(fmt.Sprintf("Could not parse hocr in file %s: %v", hocrpath, err)) - } - - f, err := os.Open(imgpath) - defer f.Close() - if err != nil { - return errors.New(fmt.Sprintf("Could not open file %s: %v", imgpath, err)) - } - img, _, err := image.Decode(f) - if err != nil { - return errors.New(fmt.Sprintf("Could not decode image: %v", err)) - } - b := img.Bounds() - pdf.AddPageFormat("P", gofpdf.SizeType{Wd: pxToPt(b.Dx()), Ht: pxToPt(b.Dy())}) - - // TODO: check for errors in pdf as going through - - _ = pdf.RegisterImageOptions(imgpath, gofpdf.ImageOptions{}) - pdf.ImageOptions(imgpath, 0, 0, pxToPt(b.Dx()), pxToPt(b.Dy()), false, gofpdf.ImageOptions{}, 0, "") - - pdf.SetTextRenderingMode(gofpdf.TextRenderingModeInvisible) - - for _, l := range h.Lines { - coords, err := hocr.BoxCoords(l.Title) - if err != nil { - continue - } - pdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) - pdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(hocr.LineText(l)), "", 0, "T", false, 0, "") - } - return nil -} - -func savePdf(pdf *gofpdf.Fpdf, p string) error { - return pdf.OutputFileAndClose(p) -} - -func walker(pdf *gofpdf.Fpdf) filepath.WalkFunc { +func walker(pdf Pdfer) filepath.WalkFunc { return func(fpath string, info os.FileInfo, err error) error { if info.IsDir() { return nil @@ -104,7 +42,7 @@ func walker(pdf *gofpdf.Fpdf) filepath.WalkFunc { } else { imgpath = strings.TrimSuffix(fpath, ".hocr") + ".png" } - return addPage(pdf, imgpath, fpath) + return pdf.AddPage(imgpath, fpath) } } @@ -123,14 +61,15 @@ func main() { return } - pdf := setupPdf() + pdf := new(bookpipeline.Fpdf) + pdf.Setup() err := filepath.Walk(flag.Arg(0), walker(pdf)) if err != nil { log.Fatalln("Failed to walk", flag.Arg(0), err) } - err = savePdf(pdf, flag.Arg(1)) + err = pdf.Save(flag.Arg(1)) if err != nil { log.Fatalln("Failed to save", flag.Arg(1), err) } |