summaryrefslogtreecommitdiff
path: root/cmd/getpipelinebook
diff options
context:
space:
mode:
authorNick White <git@njw.name>2021-02-05 17:15:51 +0000
committerNick White <git@njw.name>2021-02-05 17:15:51 +0000
commit11470933e4fd379b4aefa4e2bab33662a72791c2 (patch)
tree8607e7739989ff63032b9ce10a8bf8553ecc6eb6 /cmd/getpipelinebook
parent3e7da751b3ca917adb79674eac4ef2a3267e3984 (diff)
parenta8c7481f0dc02bbda3b3a07091e9d61f6eb728b2 (diff)
Merge branch 'master' of ssh://ssh.phx.nearlyfreespeech.net/home/public/bookpipeline
Diffstat (limited to 'cmd/getpipelinebook')
-rw-r--r--cmd/getpipelinebook/main.go98
1 files changed, 18 insertions, 80 deletions
diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go
index 03e709b..ccedd72 100644
--- a/cmd/getpipelinebook/main.go
+++ b/cmd/getpipelinebook/main.go
@@ -6,15 +6,15 @@
package main
import (
- "bufio"
"flag"
"fmt"
"log"
"os"
"path/filepath"
- "strings"
"rescribe.xyz/bookpipeline"
+
+ "rescribe.xyz/bookpipeline/internal/pipeline"
)
const usage = `Usage: getpipelinebook [-c conn] [-a] [-graph] [-pdf] [-png] [-v] bookname
@@ -33,28 +33,6 @@ func (w NullWriter) Write(p []byte) (n int, err error) {
return len(p), nil
}
-type Pipeliner interface {
- MinimalInit() error
- ListObjects(bucket string, prefix string) ([]string, error)
- Download(bucket string, key string, fn string) error
- Upload(bucket string, key string, path string) error
- CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error)
- AddToQueue(url string, msg string) error
- DelFromQueue(url string, handle string) error
- WIPStorageId() string
-}
-
-func getpdfs(conn Pipeliner, l *log.Logger, bookname string) {
- for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} {
- fn := filepath.Join(bookname, bookname+suffix)
- l.Println("Downloading PDF", fn)
- err := conn.Download(conn.WIPStorageId(), fn, fn)
- if err != nil {
- log.Printf("Failed to download %s: %s\n", fn, err)
- }
- }
-}
-
func main() {
all := flag.Bool("a", false, "Get all files for book")
conntype := flag.String("c", "aws", "connection type ('aws' or 'local')")
@@ -83,7 +61,7 @@ func main() {
verboselog = log.New(n, "", log.LstdFlags)
}
- var conn Pipeliner
+ var conn pipeline.MinPipeliner
switch *conntype {
case "aws":
conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
@@ -109,18 +87,10 @@ func main() {
if *all {
verboselog.Println("Downloading all files for", bookname)
- objs, err := conn.ListObjects(conn.WIPStorageId(), bookname)
+ err = pipeline.DownloadAll(bookname, bookname, conn)
if err != nil {
- log.Fatalln("Failed to get list of files for book", bookname, err)
- }
- for _, i := range objs {
- verboselog.Println("Downloading", i)
- err = conn.Download(conn.WIPStorageId(), i, i)
- if err != nil {
- log.Fatalln("Failed to download file", i, err)
- }
+ log.Fatalln(err)
}
- return
}
if *binarisedpdf {
@@ -151,61 +121,29 @@ func main() {
}
if *pdf {
- getpdfs(conn, verboselog, bookname)
+ verboselog.Println("Downloading PDFs")
+ pipeline.DownloadPdfs(bookname, bookname, conn)
}
if *binarisedpdf || *colourpdf || *graph || *pdf {
return
}
- verboselog.Println("Downloading best file")
- fn := filepath.Join(bookname, "best")
- err = conn.Download(conn.WIPStorageId(), fn, fn)
+ verboselog.Println("Downloading best pages")
+ err = pipeline.DownloadBestPages(bookname, bookname, conn, *png)
if err != nil {
- log.Fatalln("Failed to download 'best' file", err)
- }
- f, err := os.Open(fn)
- if err != nil {
- log.Fatalln("Failed to open best file", err)
- }
- defer f.Close()
-
- if *png {
- verboselog.Println("Downloading png files")
- s := bufio.NewScanner(f)
- for s.Scan() {
- txtfn := filepath.Join(bookname, s.Text())
- fn = strings.Replace(txtfn, ".hocr", ".png", 1)
- verboselog.Println("Downloading file", fn)
- err = conn.Download(conn.WIPStorageId(), fn, fn)
- if err != nil {
- log.Fatalln("Failed to download file", fn, err)
- }
- }
- return
+ log.Fatalln(err)
}
- verboselog.Println("Downloading HOCR files")
- s := bufio.NewScanner(f)
- for s.Scan() {
- fn = filepath.Join(bookname, s.Text())
- verboselog.Println("Downloading file", fn)
- err = conn.Download(conn.WIPStorageId(), fn, fn)
- if err != nil {
- log.Fatalln("Failed to download file", fn, err)
- }
+ verboselog.Println("Downloading PDFs")
+ pipeline.DownloadPdfs(bookname, bookname, conn)
+ if err != nil {
+ log.Fatalln(err)
}
- verboselog.Println("Downloading PDF files")
- getpdfs(conn, verboselog, bookname)
-
- verboselog.Println("Downloading analysis files")
- for _, a := range []string{"conf", "graph.png"} {
- fn = filepath.Join(bookname, a)
- verboselog.Println("Downloading file", fn)
- err = conn.Download(conn.WIPStorageId(), fn, fn)
- if err != nil {
- log.Fatalln("Failed to download file", fn, err)
- }
+ verboselog.Println("Downloading analyses")
+ err = pipeline.DownloadAnalyses(bookname, bookname, conn)
+ if err != nil {
+ log.Fatalln(err)
}
}