From b9aeada4e573643985d6df03f672f2c2fec169d8 Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 31 Oct 2019 15:06:50 +0000 Subject: PDF: add functionality to use "best" file if it exists --- cmd/pdfbook/main.go | 88 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 71 insertions(+), 17 deletions(-) (limited to 'cmd') diff --git a/cmd/pdfbook/main.go b/cmd/pdfbook/main.go index 4dc2c93..bdb486d 100644 --- a/cmd/pdfbook/main.go +++ b/cmd/pdfbook/main.go @@ -1,12 +1,14 @@ package main import ( + "bufio" "flag" "fmt" "log" "os" "path" "path/filepath" + "sort" "strings" "rescribe.xyz/bookpipeline" @@ -26,31 +28,71 @@ func pxToPt(i int) float64 { return float64(i) / pageWidth } +// imgPath returns an appropriate path for the image that +// corresponds with the hocrpath +func imgPath(hocrpath string, colour bool) string { + d := path.Dir(hocrpath) + name := path.Base(hocrpath) + nosuffix := strings.TrimSuffix(name, ".hocr") + imgname := "" + if colour { + p := strings.SplitN(name, "_bin", 2) + if len(p) > 1 { + imgname = p[0] + ".jpg" + } else { + imgname = nosuffix + ".jpg" + } + } else { + imgname = nosuffix + ".png" + } + return path.Join(d, imgname) +} + +// addBest adds the pages in dir/best to a PDF +func addBest(dir string, pdf Pdfer, colour bool) error { + f, err := os.Open(path.Join(dir, "best")) + if err != nil { + log.Fatalln("Failed to open best file", err) + } + defer f.Close() + + s := bufio.NewScanner(f) + var files []string + for s.Scan() { + fn := s.Text() + if path.Ext(fn) != ".hocr" { + continue + } + files = append(files, fn) + } + sort.Strings(files) + + for _, f := range files { + hocrpath := path.Join(dir, f) + img := imgPath(hocrpath, colour) + err := pdf.AddPage(img, hocrpath) + if err != nil { + return err + } + } + return nil +} + +// walker walks each hocr file in a directory and adds a page to +// the pdf for each one. func walker(pdf Pdfer, colour bool) filepath.WalkFunc { return func(fpath string, info os.FileInfo, err error) error { if info.IsDir() { return nil } - if !strings.HasSuffix(fpath, ".hocr") { + if path.Ext(fpath) != ".hocr" { return nil } - imgpath := "" - if colour { - p := strings.SplitN(path.Base(fpath), "_bin", 2) - if len(p) > 1 { - imgpath = path.Join(path.Dir(fpath), p[0] + ".jpg") - } else { - imgpath = strings.TrimSuffix(fpath, ".hocr") + ".jpg" - } - } else { - imgpath = strings.TrimSuffix(fpath, ".hocr") + ".png" - } - return pdf.AddPage(imgpath, fpath) + return pdf.AddPage(imgPath(fpath, colour), fpath) } } func main() { - // TODO: handle best // TODO: probably take flags to resize / change quality in due course colour := flag.Bool("c", false, "colour") flag.Usage = func() { @@ -64,12 +106,24 @@ func main() { return } + _, err := os.Stat(path.Join(flag.Arg(0), "best")) + if err != nil && !os.IsNotExist(err) { + log.Fatalln("Failed to stat best", err) + } + pdf := new(bookpipeline.Fpdf) pdf.Setup() - err := filepath.Walk(flag.Arg(0), walker(pdf, *colour)) - if err != nil { - log.Fatalln("Failed to walk", flag.Arg(0), err) + if os.IsNotExist(err) { + err = filepath.Walk(flag.Arg(0), walker(pdf, *colour)) + if err != nil { + log.Fatalln("Failed to walk", flag.Arg(0), err) + } + } else { + err = addBest(flag.Arg(0), pdf, *colour) + if err != nil { + log.Fatalln("Failed to add best pages", err) + } } err = pdf.Save(flag.Arg(1)) -- cgit v1.2.1-24-ge1ad