diff options
| -rw-r--r-- | cmd/getpipelinebook/main.go | 100 | ||||
| -rw-r--r-- | cmd/rescribe/main.go | 2 | ||||
| -rw-r--r-- | internal/pipeline/get.go | 33 | ||||
| -rw-r--r-- | internal/pipeline/pipeline.go | 5 | 
4 files changed, 56 insertions, 84 deletions
| diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index ef13db5..5116414 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -6,18 +6,16 @@  package main  import ( -	"bufio"  	"flag"  	"fmt"  	"log"  	"os"  	"path/filepath" -	"strings"  	"rescribe.xyz/bookpipeline" -) -// TODO: use internal/pipeline/get.go functions +	"rescribe.xyz/bookpipeline/internal/pipeline" +)  const usage = `Usage: getpipelinebook [-c conn] [-a] [-graph] [-pdf] [-png] [-v] bookname @@ -35,28 +33,6 @@ func (w NullWriter) Write(p []byte) (n int, err error) {  	return len(p), nil  } -type Pipeliner interface { -	MinimalInit() error -	ListObjects(bucket string, prefix string) ([]string, error) -	Download(bucket string, key string, fn string) error -	Upload(bucket string, key string, path string) error -	CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error) -	AddToQueue(url string, msg string) error -	DelFromQueue(url string, handle string) error -	WIPStorageId() string -} - -func getpdfs(conn Pipeliner, l *log.Logger, bookname string) { -	for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { -		fn := filepath.Join(bookname, bookname+suffix) -		l.Println("Downloading PDF", fn) -		err := conn.Download(conn.WIPStorageId(), fn, fn) -		if err != nil { -			log.Printf("Failed to download %s: %s\n", fn, err) -		} -	} -} -  func main() {  	all := flag.Bool("a", false, "Get all files for book")  	conntype := flag.String("c", "aws", "connection type ('aws' or 'local')") @@ -85,7 +61,7 @@ func main() {  		verboselog = log.New(n, "", log.LstdFlags)  	} -	var conn Pipeliner +	var conn pipeline.MinPipeliner  	switch *conntype {  	case "aws":  		conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog} @@ -111,18 +87,10 @@ func main() {  	if *all {  		verboselog.Println("Downloading all files for", bookname) -		objs, err := conn.ListObjects(conn.WIPStorageId(), bookname) +		err = pipeline.DownloadAll(bookname, conn)  		if err != nil { -			log.Fatalln("Failed to get list of files for book", bookname, err) -		} -		for _, i := range objs { -			verboselog.Println("Downloading", i) -			err = conn.Download(conn.WIPStorageId(), i, i) -			if err != nil { -				log.Fatalln("Failed to download file", i, err) -			} +			log.Fatalln(err)  		} -		return  	}  	if *binarisedpdf { @@ -153,61 +121,29 @@ func main() {  	}  	if *pdf { -		getpdfs(conn, verboselog, bookname) +		verboselog.Println("Downloading PDFs") +		pipeline.DownloadPdfs(bookname, conn)  	}  	if *binarisedpdf || *colourpdf || *graph || *pdf {  		return  	} -	verboselog.Println("Downloading best file") -	fn := filepath.Join(bookname, "best") -	err = conn.Download(conn.WIPStorageId(), fn, fn) +	verboselog.Println("Downloading best pages") +	err = pipeline.DownloadBestPages(bookname, conn, *png)  	if err != nil { -		log.Fatalln("Failed to download 'best' file", err) -	} -	f, err := os.Open(fn) -	if err != nil { -		log.Fatalln("Failed to open best file", err) -	} -	defer f.Close() - -	if *png { -		verboselog.Println("Downloading png files") -		s := bufio.NewScanner(f) -		for s.Scan() { -			txtfn := filepath.Join(bookname, s.Text()) -			fn = strings.Replace(txtfn, ".hocr", ".png", 1) -			verboselog.Println("Downloading file", fn) -			err = conn.Download(conn.WIPStorageId(), fn, fn) -			if err != nil { -				log.Fatalln("Failed to download file", fn, err) -			} -		} -		return +		log.Fatalln(err)  	} -	verboselog.Println("Downloading HOCR files") -	s := bufio.NewScanner(f) -	for s.Scan() { -		fn = filepath.Join(bookname, s.Text()) -		verboselog.Println("Downloading file", fn) -		err = conn.Download(conn.WIPStorageId(), fn, fn) -		if err != nil { -			log.Fatalln("Failed to download file", fn, err) -		} +	verboselog.Println("Downloading PDFs") +	pipeline.DownloadPdfs(bookname, conn) +	if err != nil { +		log.Fatalln(err)  	} -	verboselog.Println("Downloading PDF files") -	getpdfs(conn, verboselog, bookname) - -	verboselog.Println("Downloading analysis files") -	for _, a := range []string{"conf", "graph.png"} { -		fn = filepath.Join(bookname, a) -		verboselog.Println("Downloading file", fn) -		err = conn.Download(conn.WIPStorageId(), fn, fn) -		if err != nil { -			log.Fatalln("Failed to download file", fn, err) -		} +	verboselog.Println("Downloading analyses") +	err = pipeline.DownloadAnalyses(bookname, conn) +	if err != nil { +		log.Fatalln(err)  	}  } diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 1a3dcff..8e2fe69 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -168,7 +168,7 @@ func downloadbook(name string, conn Pipeliner) error {  		log.Fatalln("Failed to create directory", name, err)  	} -	err = pipeline.DownloadBestPages(name, conn) +	err = pipeline.DownloadBestPages(name, conn, false)  	if err != nil {  		return fmt.Errorf("Error downloading best pages: %v", err)  	} diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go index 8492d99..6949062 100644 --- a/internal/pipeline/get.go +++ b/internal/pipeline/get.go @@ -9,9 +9,10 @@ import (  	"fmt"  	"os"  	"path/filepath" +	"strings"  ) -func DownloadBestPages(name string, conn Pipeliner) error { +func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error {  	fn := filepath.Join(name, "best")  	err := conn.Download(conn.WIPStorageId(), fn, fn)  	if err != nil { @@ -26,12 +27,27 @@ func DownloadBestPages(name string, conn Pipeliner) error {  	s := bufio.NewScanner(f)  	for s.Scan() {  		fn = filepath.Join(name, s.Text()) +		conn.Log("Downloading file", fn)  		err = conn.Download(conn.WIPStorageId(), fn, fn)  		if err != nil {  			return fmt.Errorf("Failed to download file %s: %v", fn, err)  		}  	} +	if !pluspngs { +		return nil +	} + +	s = bufio.NewScanner(f) +	for s.Scan() { +		txtfn := filepath.Join(name, s.Text()) +		fn = strings.Replace(txtfn, ".hocr", ".png", 1) +		conn.Log("Downloading file", fn) +		err = conn.Download(conn.WIPStorageId(), fn, fn) +		if err != nil { +			return fmt.Errorf("Failed to download file", fn, err) +		} +	}  	return nil  } @@ -56,3 +72,18 @@ func DownloadAnalyses(name string, conn Pipeliner) error {  	}  	return nil  } + +func DownloadAll(name string, conn Pipeliner) error { +	objs, err := conn.ListObjects(conn.WIPStorageId(), name) +	if err != nil { +		return fmt.Errorf("Failed to get list of files for book", name, err) +	} +	for _, i := range objs { +		conn.Log("Downloading", i) +		err = conn.Download(conn.WIPStorageId(), i, i) +		if err != nil { +			return fmt.Errorf("Failed to download file", i, err) +		} +	} +	return nil +} diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index cce5c19..c0accdb 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -52,6 +52,11 @@ type Pipeliner interface {  	Log(v ...interface{})  } +type MinPipeliner interface { +	Pipeliner +	MinimalInit() error +} +  type pageimg struct {  	hocr, img string  } | 
