summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/bookpipeline/main.go2
-rw-r--r--cmd/rescribe/gui.go10
-rw-r--r--cmd/rescribe/main.go6
-rw-r--r--internal/pipeline/pipeline.go90
4 files changed, 55 insertions, 53 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 2a9f54b..076df32 100644
--- a/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -271,7 +271,7 @@ func main() {
}
stopTimer(stopIfQuiet)
conn.Log("Message received on analyse queue, processing", msg.Body)
- err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "")
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn, false), ocredPattern, conn.AnalyseQueueId(), "")
resetTimer(stopIfQuiet, quietTime)
if err != nil {
conn.Log("Error during analysis", err)
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index 36b4f16..7c5a727 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -340,8 +340,8 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
wipe := widget.NewCheck("Automatically clean image sides", func(bool) {})
- smallpdf := widget.NewCheck("Reduce size of searchable PDF", func(bool) {})
- smallpdf.Checked = true
+ bigpdf := widget.NewCheck("Use highest image quality for searchable PDF (requires lots of RAM)", func(bool) {})
+ bigpdf.Checked = false
trainingLabel := widget.NewLabel("Language / Script")
@@ -532,7 +532,7 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
training = training[start:end]
}
- err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked, !smallpdf.Checked)
+ err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked, bigpdf.Checked)
if err != nil && strings.HasSuffix(err.Error(), "context canceled") {
progressBar.SetValue(0.0)
return
@@ -569,8 +569,8 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
trainingBits := container.New(layout.NewBorderLayout(nil, nil, trainingLabel, nil), trainingLabel, trainingOpts)
- fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail)
- startContent := container.NewVBox(choices, trainingBits, wipe, smallpdf, gobtn, abortbtn, progressBar, detail)
+ fullContent = container.NewVBox(choices, chosen, trainingBits, wipe, bigpdf, gobtn, abortbtn, progressBar, detail)
+ startContent := container.NewVBox(choices, trainingBits, wipe, bigpdf, gobtn, abortbtn, progressBar, detail)
myWindow.SetContent(startContent)
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 96f6162..eba8e84 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -484,7 +484,7 @@ func startProcess(ctx context.Context, logger log.Logger, tessCommand string, bo
}
fmt.Printf("Processing book\n")
- err = processbook(ctx, trainingName, tessCommand, conn)
+ err = processbook(ctx, trainingName, tessCommand, conn, fullpdf)
if err != nil {
_ = os.RemoveAll(tempdir)
return fmt.Errorf("Error processing book: %v", err)
@@ -648,7 +648,7 @@ func downloadbook(dir string, name string, conn Pipeliner) error {
return nil
}
-func processbook(ctx context.Context, training string, tesscmd string, conn Pipeliner) error {
+func processbook(ctx context.Context, training string, tesscmd string, conn Pipeliner, fullpdf bool) error {
origPattern := regexp.MustCompile(`[0-9]{4}.(jpg|png)$`)
wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.(jpg|png)$`)
ocredPattern := regexp.MustCompile(`.hocr$`)
@@ -764,7 +764,7 @@ func processbook(ctx context.Context, training string, tesscmd string, conn Pipe
stopTimer(stopIfQuiet)
conn.Log("Message received on analyse queue, processing", msg.Body)
fmt.Printf("\n Analysing OCR and compiling PDFs\n")
- err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "")
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn, fullpdf), ocredPattern, conn.AnalyseQueueId(), "")
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("Error during analysis: %v", err)
diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go
index a09a414..d8beeb9 100644
--- a/internal/pipeline/pipeline.go
+++ b/internal/pipeline/pipeline.go
@@ -330,7 +330,7 @@ func Ocr(training string, tesscmd string) func(context.Context, chan string, cha
}
}
-func Analyse(conn Downloader) func(context.Context, chan string, chan string, chan error, *log.Logger) {
+func Analyse(conn Downloader, mkfullpdf bool) func(context.Context, chan string, chan string, chan error, *log.Logger) {
return func(ctx context.Context, toanalyse chan string, up chan string, errc chan error, logger *log.Logger) {
confs := make(map[string][]*bookpipeline.Conf)
bestconfs := make(map[string]*bookpipeline.Conf)
@@ -455,12 +455,6 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch
errc <- fmt.Errorf("Failed to set up PDF: %s", err)
return
}
- fullsizepdf := new(bookpipeline.Fpdf)
- err = fullsizepdf.Setup()
- if err != nil {
- errc <- fmt.Errorf("Failed to set up PDF: %s", err)
- return
- }
binhascontent, colourhascontent := false, false
select {
@@ -583,54 +577,62 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch
up <- fn
}
- for _, pg := range colourimgs {
- select {
- case <-ctx.Done():
- errc <- ctx.Err()
+ if mkfullpdf {
+ fullsizepdf := new(bookpipeline.Fpdf)
+ err = fullsizepdf.Setup()
+ if err != nil {
+ errc <- fmt.Errorf("Failed to set up PDF: %s", err)
return
- default:
}
+ for _, pg := range colourimgs {
+ select {
+ case <-ctx.Done():
+ errc <- ctx.Err()
+ return
+ default:
+ }
- logger.Println("Downloading colour page to add to PDF", pg.img)
- colourfn := pg.img
- err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn))
- if err != nil {
- colourfn = strings.Replace(pg.img, ".jpg", ".png", 1)
- logger.Println("Download failed; trying", colourfn)
+ logger.Println("Downloading colour page to add to PDF", pg.img)
+ colourfn := pg.img
err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn))
if err != nil {
- logger.Println("Download failed; skipping page", pg.img)
+ colourfn = strings.Replace(pg.img, ".jpg", ".png", 1)
+ logger.Println("Download failed; trying", colourfn)
+ err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn))
+ if err != nil {
+ logger.Println("Download failed; skipping page", pg.img)
+ }
}
- }
- if err == nil {
- err = fullsizepdf.AddPage(filepath.Join(savedir, colourfn), filepath.Join(savedir, pg.hocr), false)
- if err != nil {
- errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err)
- return
- }
- err = os.Remove(filepath.Join(savedir, colourfn))
- if err != nil {
- errc <- err
- return
+ if err == nil {
+ err = fullsizepdf.AddPage(filepath.Join(savedir, colourfn), filepath.Join(savedir, pg.hocr), false)
+ if err != nil {
+ errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err)
+ return
+ }
+ err = os.Remove(filepath.Join(savedir, colourfn))
+ if err != nil {
+ errc <- err
+ return
+ }
}
}
- }
-
- select {
- case <-ctx.Done():
- errc <- ctx.Err()
- return
- default:
- }
- if colourhascontent {
- fn = filepath.Join(savedir, bookname+".original.pdf")
- err = fullsizepdf.Save(fn)
- if err != nil {
- errc <- fmt.Errorf("Failed to save full size pdf: %s", err)
+ select {
+ case <-ctx.Done():
+ errc <- ctx.Err()
return
+ default:
+ }
+
+ if colourhascontent {
+ fn = filepath.Join(savedir, bookname+".original.pdf")
+ err = fullsizepdf.Save(fn)
+ if err != nil {
+ errc <- fmt.Errorf("Failed to save full size pdf: %s", err)
+ return
+ }
+ up <- fn
}
- up <- fn
}
select {