summaryrefslogtreecommitdiff
path: root/cmd/bookpipeline
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-12-06 09:46:06 +0000
committerNick White <git@njw.name>2019-12-06 09:46:06 +0000
commit844888519265ca6cf088f48640abbe55d734acc5 (patch)
treebb60a622038ec2b91476a7f839bcd273cdd8592e /cmd/bookpipeline
parentb545cdbb19a2f6a1d3970c26b600eab27ffedfa3 (diff)
Don't abort PDF generation if pages aren't found, just do the best that can be done and move on; not all books will have all page types (such as wipeonly books)
Diffstat (limited to 'cmd/bookpipeline')
-rw-r--r--cmd/bookpipeline/main.go87
1 files changed, 51 insertions, 36 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 7283967..321db94 100644
--- a/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -18,6 +18,8 @@ import (
"rescribe.xyz/utils/pkg/hocr"
)
+// TODO: stop using filepath.Join for keys; just use '/' delimeter
+
const usage = `Usage: bookpipeline [-v] [-np] [-nw] [-no] [-nop] [-na] [-t training]
Watches the preprocess, ocr and analyse queues for book names. When
@@ -284,61 +286,74 @@ func analyse(conn Pipeliner) func(chan string, chan string, chan error, *log.Log
errc <- errors.New(fmt.Sprintf("Failed to set up PDF: %s", err))
return
}
+ binhascontent, colourhascontent := false, false
for _, pg := range pgs {
- var imgfns []string
+ var colourfn, binfn string
base := filepath.Base(pg)
nosuffix := strings.TrimSuffix(base, ".hocr")
- imgfns = append(imgfns, nosuffix + ".png")
p := strings.SplitN(base, "_bin", 2)
+
+ binfn = nosuffix + ".png"
if len(p) > 1 {
- imgfns = append(imgfns, p[0] + ".jpg")
+ colourfn = p[0] + ".jpg"
} else {
- imgfns = append(imgfns, nosuffix + ".jpg")
+ colourfn = nosuffix + ".jpg"
}
- for n, i := range imgfns {
- logger.Println("Downloading", i)
- err := conn.Download(conn.WIPStorageId(), filepath.Join(bookname, i), filepath.Join(savedir, i))
+
+ logger.Println("Downloading binarised page to add to PDF", binfn)
+ err := conn.Download(conn.WIPStorageId(), filepath.Join(bookname, binfn), filepath.Join(savedir, binfn))
+ if err != nil {
+ logger.Println("Download failed; skipping page", binfn)
+ } else {
+ err = binarisedpdf.AddPage(filepath.Join(savedir, binfn), pg, true)
if err != nil {
- imgfns[n] = strings.Replace(i, ".jpg", ".png", 1)
- i = imgfns[n]
- logger.Println("Download failed; trying", i)
- err := conn.Download(conn.WIPStorageId(), filepath.Join(bookname, i), filepath.Join(savedir, i))
- if err != nil {
- close(up)
- errc <- errors.New(fmt.Sprintf("Failed to download book page %s: %s", filepath.Join(bookname, i), err))
- return
- }
+ close(up)
+ errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", binfn, err))
+ return
+ }
+ binhascontent = true
+ }
+
+ logger.Println("Downloading colour page to add to PDF", colourfn)
+ err = conn.Download(conn.WIPStorageId(), filepath.Join(bookname, colourfn), filepath.Join(savedir, colourfn))
+ if err != nil {
+ colourfn = strings.Replace(colourfn, ".jpg", ".png", 1)
+ logger.Println("Download failed; trying", colourfn)
+ err = conn.Download(conn.WIPStorageId(), filepath.Join(bookname, colourfn), filepath.Join(savedir, colourfn))
+ if err != nil {
+ logger.Println("Download failed; skipping page", colourfn)
+ }
+ }
+ if err == nil {
+ err = binarisedpdf.AddPage(filepath.Join(savedir, colourfn), pg, true)
+ if err != nil {
+ close(up)
+ errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", colourfn, err))
+ return
}
+ colourhascontent = true
}
- err = colourpdf.AddPage(filepath.Join(savedir, imgfns[1]), pg, true)
+ }
+ if colourhascontent {
+ fn = filepath.Join(savedir, bookname + ".colour.pdf")
+ err = colourpdf.Save(fn)
if err != nil {
close(up)
- errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", imgfns[1], err))
+ errc <- errors.New(fmt.Sprintf("Failed to save colour pdf: %s", err))
return
}
- err = binarisedpdf.AddPage(filepath.Join(savedir, imgfns[0]), pg, true)
+ up <- fn
+ }
+ if binhascontent {
+ fn = filepath.Join(savedir, bookname + ".binarised.pdf")
+ err = binarisedpdf.Save(fn)
if err != nil {
close(up)
- errc <- errors.New(fmt.Sprintf("Failed to add page %s to PDF: %s", imgfns[0], err))
+ errc <- errors.New(fmt.Sprintf("Failed to save binarised pdf: %s", err))
return
}
+ up <- fn
}
- fn = filepath.Join(savedir, bookname + ".colour.pdf")
- err = colourpdf.Save(fn)
- if err != nil {
- close(up)
- errc <- errors.New(fmt.Sprintf("Failed to save colour pdf: %s", err))
- return
- }
- up <- fn
- fn = filepath.Join(savedir, bookname + ".binarised.pdf")
- err = binarisedpdf.Save(fn)
- if err != nil {
- close(up)
- errc <- errors.New(fmt.Sprintf("Failed to save binarised pdf: %s", err))
- return
- }
- up <- fn
logger.Println("Creating graph")
fn = filepath.Join(savedir, "graph.png")