diff options
author | Nick White <git@njw.name> | 2022-03-11 13:36:59 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2022-03-11 13:36:59 +0000 |
commit | 9d1382b69700129a66541d786ba3b784eda56e36 (patch) | |
tree | a1e607d55dbd08b586f528eea07ea15079ed1684 /internal/pipeline | |
parent | d6e1cb61da7a9155023ce9dece96da79c7246790 (diff) |
Add initial support for full-size PDF generation
Some issues:
1) The PDF generation stores every page in memory while it constructs it. That means that
there's a higher chance of failure due to running out of memory with these. There's no
getting around this except by improving the PDF generation library, which is not easy.
2) Currently I've just changed the pipeline to always generate these full size PDFs, and
then the rescribe tool will just delete them if they weren't requested. This is bad in
particular because of point 1, and would probably cause issues of failures in the server
pipeline as a result
Therefore the plan is to add a tag to queue messages so that full size generation can be
selectively enabled.
Also, it should be split from the loop with colour pdf generation, as holding them both in RAM at
the same time is unnecessary.
Diffstat (limited to 'internal/pipeline')
-rw-r--r-- | internal/pipeline/get.go | 2 | ||||
-rw-r--r-- | internal/pipeline/pipeline.go | 19 |
2 files changed, 20 insertions, 1 deletions
diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go index de4ac3b..8fac060 100644 --- a/internal/pipeline/get.go +++ b/internal/pipeline/get.go @@ -68,7 +68,7 @@ func DownloadBestPngs(dir string, name string, conn Downloader) error { func DownloadPdfs(dir string, name string, conn Downloader) error { anydone := false errmsg := "" - for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { + for _, suffix := range []string{".colour.pdf", ".binarised.pdf", ".original.pdf"} { key := filepath.Join(name, name+suffix) fn := filepath.Join(dir, name+suffix) err := conn.Download(conn.WIPStorageId(), key, fn) diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index 40ed02c..cbd179b 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -455,6 +455,12 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch errc <- fmt.Errorf("Failed to set up PDF: %s", err) return } + fullsizepdf := new(bookpipeline.Fpdf) + err = fullsizepdf.Setup() + if err != nil { + errc <- fmt.Errorf("Failed to set up PDF: %s", err) + return + } binhascontent, colourhascontent := false, false select { @@ -551,6 +557,11 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err) return } + err = fullsizepdf.AddPage(filepath.Join(savedir, colourfn), filepath.Join(savedir, pg.hocr), false) + if err != nil { + errc <- fmt.Errorf("Failed to add page %s to PDF: %s", pg.img, err) + return + } colourhascontent = true err = os.Remove(filepath.Join(savedir, colourfn)) if err != nil { @@ -575,6 +586,14 @@ func Analyse(conn Downloader) func(context.Context, chan string, chan string, ch return } up <- fn + + fn = filepath.Join(savedir, bookname+".original.pdf") + err = fullsizepdf.Save(fn) + if err != nil { + errc <- fmt.Errorf("Failed to save full size pdf: %s", err) + return + } + up <- fn } select { |