summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/rescribe/gui.go9
-rw-r--r--cmd/rescribe/main.go14
-rw-r--r--internal/pipeline/get.go2
-rw-r--r--internal/pipeline/pipeline.go19
4 files changed, 38 insertions, 6 deletions
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index 1c55462..36b4f16 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -340,6 +340,9 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
wipe := widget.NewCheck("Automatically clean image sides", func(bool) {})
+ smallpdf := widget.NewCheck("Reduce size of searchable PDF", func(bool) {})
+ smallpdf.Checked = true
+
trainingLabel := widget.NewLabel("Language / Script")
trainingOpts := mkTrainingSelect([]string{training}, myWindow)
@@ -529,7 +532,7 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
training = training[start:end]
}
- err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked)
+ err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked, !smallpdf.Checked)
if err != nil && strings.HasSuffix(err.Error(), "context canceled") {
progressBar.SetValue(0.0)
return
@@ -566,8 +569,8 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
trainingBits := container.New(layout.NewBorderLayout(nil, nil, trainingLabel, nil), trainingLabel, trainingOpts)
- fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, gobtn, abortbtn, progressBar, detail)
- startContent := container.NewVBox(choices, trainingBits, wipe, gobtn, abortbtn, progressBar, detail)
+ fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail)
+ startContent := container.NewVBox(choices, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail)
myWindow.SetContent(startContent)
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 83153c6..96f6162 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -157,6 +157,7 @@ These training files are included in rescribe, and are always available:
`)
tesscmd := flag.String("tesscmd", deftesscmd, "The Tesseract executable to run. You may need to set this to the full path of Tesseract.exe if you're on Windows.")
wipe := flag.Bool("wipe", false, "Use wiper tool to remove noise like gutters from page before processing.")
+ fullpdf := flag.Bool("fullpdf", false, "Create a full-size searchable PDF (rather than a reduced size one).")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
@@ -306,7 +307,7 @@ These training files are included in rescribe, and are always available:
ispdf = true
}
- err = startProcess(ctx, *verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir, !*wipe)
+ err = startProcess(ctx, *verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir, !*wipe, *fullpdf)
if err != nil {
log.Fatalln(err)
}
@@ -446,7 +447,7 @@ func rmIfNotImage(f string) error {
return nil
}
-func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string, nowipe bool) error {
+func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string, nowipe bool, fullpdf bool) error {
cmd := exec.Command(tessCommand, "--help")
pipeline.HideCmd(cmd)
_, err := cmd.Output()
@@ -544,8 +545,17 @@ func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bo
// to .pdf.
binpath := filepath.Join(savedir, bookname+".binarised.pdf")
colourpath := filepath.Join(savedir, bookname+".colour.pdf")
+ fullsizepath := filepath.Join(savedir, bookname+".original.pdf")
pdfpath := filepath.Join(savedir, bookname+" searchable.pdf")
+ // If full size pdf is requested, replace colour.pdf with it,
+ // otherwise just remove it
+ if fullpdf {
+ _ = os.Rename(fullsizepath, colourpath)
+ } else {
+ _ = os.Remove(fullsizepath)
+ }
+
_, err = os.Stat(binpath)
binexists := err == nil || os.IsExist(err)
_, err = os.Stat(colourpath)
diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go
index de4ac3b..8fac060 100644
--- a/internal/pipeline/get.go
+++ b/internal/pipeline/get.go
@@ -68,7 +68,7 @@ func DownloadBestPngs(dir string, name string, conn Downloader) error {
func DownloadPdfs(dir string, name string, conn Downloader) error {
anydone := false
errmsg := ""
- for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} {
+ for _, suffix := range []string{".colour.pdf", ".binarised.pdf", ".original.pdf"} {
key := filepath.Join(name, name+suffix)
fn := filepath.Join(dir, name+suffix)
err := conn.Download(conn.WIPStorageId(), key, fn)
diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go
index 40ed02c..cbd179b 100644
--- a/internal/pipeline/pipeline.go
+++ b/internal/pipeline/pipeline.go
@@ -455,6 +455,12 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch
errc <- fmt.Errorf("Failed to set up PDF: %s", err)
return
}
+ fullsizepdf := new(bookpipeline.Fpdf)
+ err = fullsizepdf.Setup()
+ if err != nil {
+ errc <- fmt.Errorf("Failed to set up PDF: %s", err)
+ return
+ }
binhascontent, colourhascontent := false, false
select {
@@ -551,6 +557,11 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch
errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err)
return
}
+ err = fullsizepdf.AddPage(filepath.Join(savedir, colourfn), filepath.Join(savedir, pg.hocr), false)
+ if err != nil {
+ errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err)
+ return
+ }
colourhascontent = true
err = os.Remove(filepath.Join(savedir, colourfn))
if err != nil {
@@ -575,6 +586,14 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch
return
}
up <- fn
+
+ fn = filepath.Join(savedir, bookname+".original.pdf")
+ err = fullsizepdf.Save(fn)
+ if err != nil {
+ errc <- fmt.Errorf("Failed to save full size pdf: %s", err)
+ return
+ }
+ up <- fn
}
select {