From 76d91ea8f65c6ad52efb24ac2c94b22c2908bc5c Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 21 Mar 2022 13:51:51 +0000 Subject: Only generate full-size PDF if requested This avoids the issue that large PDFs require a lot of RAM, so there are chances of running out of memory. Plus it's a waste of space and time. --- cmd/bookpipeline/main.go | 2 +- cmd/rescribe/gui.go | 10 +++++----- cmd/rescribe/main.go | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'cmd') diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 2a9f54b..076df32 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -271,7 +271,7 @@ func main() { } stopTimer(stopIfQuiet) conn.Log("Message received on analyse queue, processing", msg.Body) - err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "") + err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn, false), ocredPattern, conn.AnalyseQueueId(), "") resetTimer(stopIfQuiet, quietTime) if err != nil { conn.Log("Error during analysis", err) diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index 36b4f16..7c5a727 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -340,8 +340,8 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess wipe := widget.NewCheck("Automatically clean image sides", func(bool) {}) - smallpdf := widget.NewCheck("Reduce size of searchable PDF", func(bool) {}) - smallpdf.Checked = true + bigpdf := widget.NewCheck("Use highest image quality for searchable PDF (requires lots of RAM)", func(bool) {}) + bigpdf.Checked = false trainingLabel := widget.NewLabel("Language / Script") @@ -532,7 +532,7 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess training = training[start:end] } - err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked, !smallpdf.Checked) + err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked, bigpdf.Checked) if err != nil && strings.HasSuffix(err.Error(), "context canceled") { progressBar.SetValue(0.0) return @@ -569,8 +569,8 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess trainingBits := container.New(layout.NewBorderLayout(nil, nil, trainingLabel, nil), trainingLabel, trainingOpts) - fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail) - startContent := container.NewVBox(choices, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail) + fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, bigpdf, gobtn, abortbtn, progressBar, detail) + startContent := container.NewVBox(choices, trainingBits, wipe, bigpdf, gobtn, abortbtn, progressBar, detail) myWindow.SetContent(startContent) diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 96f6162..eba8e84 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -484,7 +484,7 @@ func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bo } fmt.Printf("Processing book\n") - err = processbook(ctx, trainingName, tessCommand, conn) + err = processbook(ctx, trainingName, tessCommand, conn, fullpdf) if err != nil { _ = os.RemoveAll(tempdir) return fmt.Errorf("Error processing book: %v", err) @@ -648,7 +648,7 @@ func downloadbook(dir string, name string, conn Pipeliner) error { return nil } -func processbook(ctx context.Context, training string, tesscmd string, conn Pipeliner) error { +func processbook(ctx context.Context, training string, tesscmd string, conn Pipeliner, fullpdf bool) error { origPattern := regexp.MustCompile(`[0-9]{4}.(jpg|png)$`) wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.(jpg|png)$`) ocredPattern := regexp.MustCompile(`.hocr$`) @@ -764,7 +764,7 @@ func processbook(ctx context.Context, training string, tesscmd string, conn Pipe stopTimer(stopIfQuiet) conn.Log("Message received on analyse queue, processing", msg.Body) fmt.Printf("\n Analysing OCR and compiling PDFs\n") - err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "") + err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn, fullpdf), ocredPattern, conn.AnalyseQueueId(), "") resetTimer(stopIfQuiet, quietTime) if err != nil { return fmt.Errorf("Error during analysis: %v", err) -- cgit v1.2.1-24-ge1ad