summaryrefslogtreecommitdiff
path: root/cmd/pdfbook/main.go
blob: bdb486d299e32f619903d0a8f2e25deae82d86c5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package main

import (
	"bufio"
	"flag"
	"fmt"
	"log"
	"os"
	"path"
	"path/filepath"
	"sort"
	"strings"

	"rescribe.xyz/bookpipeline"
)

type Pdfer interface {
	Setup() error
	AddPage(imgpath, hocrpath string) error
	Save(path string) error
}

const pageWidth = 5 // pageWidth in inches

// pxToPt converts a pixel value into a pt value (72 pts per inch)
// This uses pageWidth to determine the appropriate value
func pxToPt(i int) float64 {
	return float64(i) / pageWidth
}

// imgPath returns an appropriate path for the image that
// corresponds with the hocrpath
func imgPath(hocrpath string, colour bool) string {
	d := path.Dir(hocrpath)
	name := path.Base(hocrpath)
	nosuffix := strings.TrimSuffix(name, ".hocr")
	imgname := ""
	if colour {
		p := strings.SplitN(name, "_bin", 2)
		if len(p) > 1 {
			imgname = p[0] + ".jpg"
		} else {
			imgname = nosuffix + ".jpg"
		}
	} else {
		imgname = nosuffix + ".png"
	}
	return path.Join(d, imgname)
}

// addBest adds the pages in dir/best to a PDF
func addBest(dir string, pdf Pdfer, colour bool) error {
	f, err := os.Open(path.Join(dir, "best"))
	if err != nil {
		log.Fatalln("Failed to open best file", err)
	}
	defer f.Close()

	s := bufio.NewScanner(f)
	var files []string
	for s.Scan() {
		fn := s.Text()
		if path.Ext(fn) != ".hocr" {
			continue
		}
		files = append(files, fn)
	}
	sort.Strings(files)

	for _, f := range files {
		hocrpath := path.Join(dir, f)
		img := imgPath(hocrpath, colour)
		err := pdf.AddPage(img, hocrpath)
		if err != nil {
			return err
		}
	}
	return nil
}

// walker walks each hocr file in a directory and adds a page to
// the pdf for each one.
func walker(pdf Pdfer, colour bool) filepath.WalkFunc {
	return func(fpath string, info os.FileInfo, err error) error {
		if info.IsDir() {
			return nil
		}
		if path.Ext(fpath) != ".hocr" {
			return nil
		}
		return pdf.AddPage(imgPath(fpath, colour), fpath)
	}
}

func main() {
	// TODO: probably take flags to resize / change quality in due course
	colour := flag.Bool("c", false, "colour")
	flag.Usage = func() {
		fmt.Fprintln(flag.CommandLine.Output(), "Usage: pdfbook [-c] hocrdir out.pdf")
		flag.PrintDefaults()
	}
	flag.Parse()

	if flag.NArg() != 2 {
		flag.Usage()
		return
	}

	_, err := os.Stat(path.Join(flag.Arg(0), "best"))
	if err != nil && !os.IsNotExist(err) {
		log.Fatalln("Failed to stat best", err)
	}

	pdf := new(bookpipeline.Fpdf)
	pdf.Setup()

	if os.IsNotExist(err) {
		err = filepath.Walk(flag.Arg(0), walker(pdf, *colour))
		if err != nil {
			log.Fatalln("Failed to walk", flag.Arg(0), err)
		}
	} else {
		err = addBest(flag.Arg(0), pdf, *colour)
		if err != nil {
			log.Fatalln("Failed to add best pages", err)
		}
	}

	err = pdf.Save(flag.Arg(1))
	if err != nil {
		log.Fatalln("Failed to save", flag.Arg(1), err)
	}
}