From af8650c074bc111200b132b0918d44cacd423b6e Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 11 Mar 2022 17:34:48 +0000 Subject: Separate out fullsize pdf creation from colour pdf creation, so less memory is needed --- internal/pipeline/pipeline.go | 47 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'internal') diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index cbd179b..a09a414 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -557,11 +557,6 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err) return } - err = fullsizepdf.AddPage(filepath.Join(savedir, colourfn), filepath.Join(savedir, pg.hocr), false) - if err != nil { - errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err) - return - } colourhascontent = true err = os.Remove(filepath.Join(savedir, colourfn)) if err != nil { @@ -586,7 +581,49 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch return } up <- fn + } + + for _, pg := range colourimgs { + select { + case <-ctx.Done(): + errc <- ctx.Err() + return + default: + } + logger.Println("Downloading colour page to add to PDF", pg.img) + colourfn := pg.img + err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn)) + if err != nil { + colourfn = strings.Replace(pg.img, ".jpg", ".png", 1) + logger.Println("Download failed; trying", colourfn) + err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn)) + if err != nil { + logger.Println("Download failed; skipping page", pg.img) + } + } + if err == nil { + err = fullsizepdf.AddPage(filepath.Join(savedir, colourfn), filepath.Join(savedir, pg.hocr), false) + if err != nil { + errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err) + return + } + err = os.Remove(filepath.Join(savedir, colourfn)) + if err != nil { + errc <- err + return + } + } + } + + select { + case <-ctx.Done(): + errc <- ctx.Err() + return + default: + } + + if colourhascontent { fn = filepath.Join(savedir, bookname+".original.pdf") err = fullsizepdf.Save(fn) if err != nil { -- cgit v1.2.1-24-ge1ad