From 6a5d6072b1558c43f15393754b7deaa1e3ecad7a Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 20 Nov 2019 17:41:35 +0000 Subject: Implement image resizing option into PDF generation, so that smaller PDFs to be generated --- cmd/pdfbook/main.go | 30 +++++++++++++++--------------- pdf.go | 33 ++++++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/cmd/pdfbook/main.go b/cmd/pdfbook/main.go index 79db353..3fdcc67 100644 --- a/cmd/pdfbook/main.go +++ b/cmd/pdfbook/main.go @@ -16,7 +16,7 @@ import ( type Pdfer interface { Setup() error - AddPage(imgpath, hocrpath string) error + AddPage(imgpath, hocrpath string, smaller bool) error Save(path string) error } @@ -49,7 +49,7 @@ func imgPath(hocrpath string, colour bool) string { } // addBest adds the pages in dir/best to a PDF -func addBest(dir string, pdf Pdfer, colour bool) error { +func addBest(dir string, pdf Pdfer, colour, smaller bool) error { f, err := os.Open(path.Join(dir, "best")) if err != nil { log.Fatalln("Failed to open best file", err) @@ -70,7 +70,7 @@ func addBest(dir string, pdf Pdfer, colour bool) error { for _, f := range files { hocrpath := path.Join(dir, f) img := imgPath(hocrpath, colour) - err := pdf.AddPage(img, hocrpath) + err := pdf.AddPage(img, hocrpath, smaller) if err != nil { return err } @@ -80,7 +80,7 @@ func addBest(dir string, pdf Pdfer, colour bool) error { // walker walks each hocr file in a directory and adds a page to // the pdf for each one. -func walker(pdf Pdfer, colour bool) filepath.WalkFunc { +func walker(pdf Pdfer, colour, smaller bool) filepath.WalkFunc { return func(fpath string, info os.FileInfo, err error) error { if info.IsDir() { return nil @@ -88,15 +88,15 @@ func walker(pdf Pdfer, colour bool) filepath.WalkFunc { if path.Ext(fpath) != ".hocr" { return nil } - return pdf.AddPage(imgPath(fpath, colour), fpath) + return pdf.AddPage(imgPath(fpath, colour), fpath, smaller) } } func main() { - // TODO: probably take flags to resize / change quality in due course colour := flag.Bool("c", false, "colour") + smaller := flag.Bool("s", false, "smaller") flag.Usage = func() { - fmt.Fprintln(flag.CommandLine.Output(), "Usage: pdfbook [-c] hocrdir out.pdf") + fmt.Fprintln(flag.CommandLine.Output(), "Usage: pdfbook [-c] [-s] hocrdir out.pdf") flag.PrintDefaults() } flag.Parse() @@ -106,24 +106,24 @@ func main() { return } - _, err := os.Stat(path.Join(flag.Arg(0), "best")) - if err != nil && !os.IsNotExist(err) { - log.Fatalln("Failed to stat best", err) - } - pdf := new(bookpipeline.Fpdf) - err = pdf.Setup() + err := pdf.Setup() if err != nil { log.Fatalln("Failed to set up PDF", err) } + _, err = os.Stat(path.Join(flag.Arg(0), "best")) + if err != nil && !os.IsNotExist(err) { + log.Fatalln("Failed to stat best", err) + } + if os.IsNotExist(err) { - err = filepath.Walk(flag.Arg(0), walker(pdf, *colour)) + err = filepath.Walk(flag.Arg(0), walker(pdf, *colour, *smaller)) if err != nil { log.Fatalln("Failed to walk", flag.Arg(0), err) } } else { - err = addBest(flag.Arg(0), pdf, *colour) + err = addBest(flag.Arg(0), pdf, *colour, *smaller) if err != nil { log.Fatalln("Failed to add best pages", err) } diff --git a/pdf.go b/pdf.go index 9fa52f3..79d2b8c 100644 --- a/pdf.go +++ b/pdf.go @@ -7,17 +7,19 @@ import ( "fmt" "html" "image" - _ "image/jpeg" - _ "image/png" + "image/jpeg" + "image/png" "io/ioutil" "os" "github.com/jung-kurt/gofpdf" + "golang.org/x/image/draw" "rescribe.xyz/utils/pkg/hocr" ) // TODO: maybe set this in Fpdf struct const pageWidth = 5 // pageWidth in inches +const scaleSmaller = 3 // amount the width and height are divided by // pxToPt converts a pixel value into a pt value (72 pts per inch) // This uses pageWidth to determine the appropriate value @@ -56,7 +58,7 @@ func (p *Fpdf) Setup() error { // AddPage adds a page to the pdf with an image and (invisible) // text from an hocr file -func (p *Fpdf) AddPage(imgpath, hocrpath string) error { +func (p *Fpdf) AddPage(imgpath, hocrpath string, smaller bool) error { file, err := ioutil.ReadFile(hocrpath) if err != nil { return errors.New(fmt.Sprintf("Could not read file %s: %v", hocrpath, err)) @@ -66,19 +68,36 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string) error { return errors.New(fmt.Sprintf("Could not parse hocr in file %s: %v", hocrpath, err)) } - f, err := os.Open(imgpath) - defer f.Close() + imgf, err := os.Open(imgpath) + defer imgf.Close() if err != nil { return errors.New(fmt.Sprintf("Could not open file %s: %v", imgpath, err)) } - img, _, err := image.Decode(f) + img, imgtype, err := image.Decode(imgf) if err != nil { return errors.New(fmt.Sprintf("Could not decode image: %v", err)) } b := img.Bounds() + if smaller { + r := image.Rect(0, 0, b.Max.X/scaleSmaller, b.Max.Y/scaleSmaller) + smimg := image.NewRGBA(r) + draw.ApproxBiLinear.Scale(smimg, r, img, b, draw.Over, nil) + img = smimg + } + + var buf bytes.Buffer + if imgtype == "jpeg" { + err = jpeg.Encode(&buf, img, &jpeg.Options{Quality: jpeg.DefaultQuality}) + } else { + err = png.Encode(&buf, img) + } + if err != nil { + return err + } + p.fpdf.AddPageFormat("P", gofpdf.SizeType{Wd: pxToPt(b.Dx()), Ht: pxToPt(b.Dy())}) - _ = p.fpdf.RegisterImageOptions(imgpath, gofpdf.ImageOptions{}) + _ = p.fpdf.RegisterImageOptionsReader(imgpath, gofpdf.ImageOptions{ImageType: imgtype}, &buf) p.fpdf.ImageOptions(imgpath, 0, 0, pxToPt(b.Dx()), pxToPt(b.Dy()), false, gofpdf.ImageOptions{}, 0, "") p.fpdf.SetTextRenderingMode(3) -- cgit v1.2.1-24-ge1ad