From 844888519265ca6cf088f48640abbe55d734acc5 Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 6 Dec 2019 09:46:06 +0000 Subject: Don't abort PDF generation if pages aren't found, just do the best that can be done and move on; not all books will have all page types (such as wipeonly books) --- cmd/bookpipeline/main.go | 87 ++++++++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 36 deletions(-) (limited to 'cmd') diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 7283967..321db94 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -18,6 +18,8 @@ import ( "rescribe.xyz/utils/pkg/hocr" ) +// TODO: stop using filepath.Join for keys; just use '/' delimeter + const usage = `Usage: bookpipeline [-v] [-np] [-nw] [-no] [-nop] [-na] [-t training] Watches the preprocess, ocr and analyse queues for book names. When @@ -284,61 +286,74 @@ func analyse(conn Pipeliner) func(chan string, chan string, chan error, *log.Log errc <- errors.New(fmt.Sprintf("Failed to set up PDF: %s", err)) return } + binhascontent, colourhascontent := false, false for _, pg := range pgs { - var imgfns []string + var colourfn, binfn string base := filepath.Base(pg) nosuffix := strings.TrimSuffix(base, ".hocr") - imgfns = append(imgfns, nosuffix + ".png") p := strings.SplitN(base, "_bin", 2) + + binfn = nosuffix + ".png" if len(p) > 1 { - imgfns = append(imgfns, p[0] + ".jpg") + colourfn = p[0] + ".jpg" } else { - imgfns = append(imgfns, nosuffix + ".jpg") + colourfn = nosuffix + ".jpg" } - for n, i := range imgfns { - logger.Println("Downloading", i) - err := conn.Download(conn.WIPStorageId(), filepath.Join(bookname, i), filepath.Join(savedir, i)) + + logger.Println("Downloading binarised page to add to PDF", binfn) + err := conn.Download(conn.WIPStorageId(), filepath.Join(bookname, binfn), filepath.Join(savedir, binfn)) + if err != nil { + logger.Println("Download failed; skipping page", binfn) + } else { + err = binarisedpdf.AddPage(filepath.Join(savedir, binfn), pg, true) if err != nil { - imgfns[n] = strings.Replace(i, ".jpg", ".png", 1) - i = imgfns[n] - logger.Println("Download failed; trying", i) - err := conn.Download(conn.WIPStorageId(), filepath.Join(bookname, i), filepath.Join(savedir, i)) - if err != nil { - close(up) - errc <- errors.New(fmt.Sprintf("Failed to download book page %s: %s", filepath.Join(bookname, i), err)) - return - } + close(up) + errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", binfn, err)) + return + } + binhascontent = true + } + + logger.Println("Downloading colour page to add to PDF", colourfn) + err = conn.Download(conn.WIPStorageId(), filepath.Join(bookname, colourfn), filepath.Join(savedir, colourfn)) + if err != nil { + colourfn = strings.Replace(colourfn, ".jpg", ".png", 1) + logger.Println("Download failed; trying", colourfn) + err = conn.Download(conn.WIPStorageId(), filepath.Join(bookname, colourfn), filepath.Join(savedir, colourfn)) + if err != nil { + logger.Println("Download failed; skipping page", colourfn) + } + } + if err == nil { + err = binarisedpdf.AddPage(filepath.Join(savedir, colourfn), pg, true) + if err != nil { + close(up) + errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", colourfn, err)) + return } + colourhascontent = true } - err = colourpdf.AddPage(filepath.Join(savedir, imgfns[1]), pg, true) + } + if colourhascontent { + fn = filepath.Join(savedir, bookname + ".colour.pdf") + err = colourpdf.Save(fn) if err != nil { close(up) - errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", imgfns[1], err)) + errc <- errors.New(fmt.Sprintf("Failed to save colour pdf: %s", err)) return } - err = binarisedpdf.AddPage(filepath.Join(savedir, imgfns[0]), pg, true) + up <- fn + } + if binhascontent { + fn = filepath.Join(savedir, bookname + ".binarised.pdf") + err = binarisedpdf.Save(fn) if err != nil { close(up) - errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", imgfns[0], err)) + errc <- errors.New(fmt.Sprintf("Failed to save binarised pdf: %s", err)) return } + up <- fn } - fn = filepath.Join(savedir, bookname + ".colour.pdf") - err = colourpdf.Save(fn) - if err != nil { - close(up) - errc <- errors.New(fmt.Sprintf("Failed to save colour pdf: %s", err)) - return - } - up <- fn - fn = filepath.Join(savedir, bookname + ".binarised.pdf") - err = binarisedpdf.Save(fn) - if err != nil { - close(up) - errc <- errors.New(fmt.Sprintf("Failed to save binarised pdf: %s", err)) - return - } - up <- fn logger.Println("Creating graph") fn = filepath.Join(savedir, "graph.png") -- cgit v1.2.1-24-ge1ad