diff options
-rw-r--r-- | cmd/rescribe/gui.go | 9 | ||||
-rw-r--r-- | cmd/rescribe/main.go | 14 | ||||
-rw-r--r-- | internal/pipeline/get.go | 2 | ||||
-rw-r--r-- | internal/pipeline/pipeline.go | 19 |
4 files changed, 38 insertions, 6 deletions
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index 1c55462..36b4f16 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -340,6 +340,9 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess wipe := widget.NewCheck("Automatically clean image sides", func(bool) {}) + smallpdf := widget.NewCheck("Reduce size of searchable PDF", func(bool) {}) + smallpdf.Checked = true + trainingLabel := widget.NewLabel("Language / Script") trainingOpts := mkTrainingSelect([]string{training}, myWindow) @@ -529,7 +532,7 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess training = training[start:end] } - err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked) + err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked, !smallpdf.Checked) if err != nil && strings.HasSuffix(err.Error(), "context canceled") { progressBar.SetValue(0.0) return @@ -566,8 +569,8 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess trainingBits := container.New(layout.NewBorderLayout(nil, nil, trainingLabel, nil), trainingLabel, trainingOpts) - fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, gobtn, abortbtn, progressBar, detail) - startContent := container.NewVBox(choices, trainingBits, wipe, gobtn, abortbtn, progressBar, detail) + fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail) + startContent := container.NewVBox(choices, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail) myWindow.SetContent(startContent) diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 83153c6..96f6162 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -157,6 +157,7 @@ These training files are included in rescribe, and are always available: `) tesscmd := flag.String("tesscmd", deftesscmd, "The Tesseract executable to run. You may need to set this to the full path of Tesseract.exe if you're on Windows.") wipe := flag.Bool("wipe", false, "Use wiper tool to remove noise like gutters from page before processing.") + fullpdf := flag.Bool("fullpdf", false, "Create a full-size searchable PDF (rather than a reduced size one).") flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -306,7 +307,7 @@ These training files are included in rescribe, and are always available: ispdf = true } - err = startProcess(ctx, *verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir, !*wipe) + err = startProcess(ctx, *verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir, !*wipe, *fullpdf) if err != nil { log.Fatalln(err) } @@ -446,7 +447,7 @@ func rmIfNotImage(f string) error { return nil } -func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string, nowipe bool) error { +func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string, nowipe bool, fullpdf bool) error { cmd := exec.Command(tessCommand, "--help") pipeline.HideCmd(cmd) _, err := cmd.Output() @@ -544,8 +545,17 @@ func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bo // to .pdf. binpath := filepath.Join(savedir, bookname+".binarised.pdf") colourpath := filepath.Join(savedir, bookname+".colour.pdf") + fullsizepath := filepath.Join(savedir, bookname+".original.pdf") pdfpath := filepath.Join(savedir, bookname+" searchable.pdf") + // If full size pdf is requested, replace colour.pdf with it, + // otherwise just remove it + if fullpdf { + _ = os.Rename(fullsizepath, colourpath) + } else { + _ = os.Remove(fullsizepath) + } + _, err = os.Stat(binpath) binexists := err == nil || os.IsExist(err) _, err = os.Stat(colourpath) diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go index de4ac3b..8fac060 100644 --- a/internal/pipeline/get.go +++ b/internal/pipeline/get.go @@ -68,7 +68,7 @@ func DownloadBestPngs(dir string, name string, conn Downloader) error { func DownloadPdfs(dir string, name string, conn Downloader) error { anydone := false errmsg := "" - for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { + for _, suffix := range []string{".colour.pdf", ".binarised.pdf", ".original.pdf"} { key := filepath.Join(name, name+suffix) fn := filepath.Join(dir, name+suffix) err := conn.Download(conn.WIPStorageId(), key, fn) diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index 40ed02c..cbd179b 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -455,6 +455,12 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch errc <- fmt.Errorf("Failed to set up PDF: %s", err) return } + fullsizepdf := new(bookpipeline.Fpdf) + err = fullsizepdf.Setup() + if err != nil { + errc <- fmt.Errorf("Failed to set up PDF: %s", err) + return + } binhascontent, colourhascontent := false, false select { @@ -551,6 +557,11 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err) return } + err = fullsizepdf.AddPage(filepath.Join(savedir, colourfn), filepath.Join(savedir, pg.hocr), false) + if err != nil { + errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err) + return + } colourhascontent = true err = os.Remove(filepath.Join(savedir, colourfn)) if err != nil { @@ -575,6 +586,14 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch return } up <- fn + + fn = filepath.Join(savedir, bookname+".original.pdf") + err = fullsizepdf.Save(fn) + if err != nil { + errc <- fmt.Errorf("Failed to save full size pdf: %s", err) + return + } + up <- fn } select { |