From 312dcbe96e45330e933f7d542e3b2ef2bf76ec08 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 13 Aug 2019 18:33:32 +0100 Subject: Various improvements to pipelinepreprocess - Ensure temporary directory already being present isn't an issue - Remove temporary directory when done with it - Ensure any already preprocessed files aren't preprocessed themselves (this could happen in the case of a run stopping half way through) --- pipelinepreprocess/main.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'pipelinepreprocess/main.go') diff --git a/pipelinepreprocess/main.go b/pipelinepreprocess/main.go index fd73725..6c58d98 100644 --- a/pipelinepreprocess/main.go +++ b/pipelinepreprocess/main.go @@ -9,6 +9,7 @@ import ( "log" "os" "path/filepath" + "regexp" "time" "github.com/aws/aws-sdk-go/aws" @@ -31,6 +32,7 @@ var verboselog *log.Logger const HeartbeatTime = 60 const PauseBetweenChecks = 60 * time.Second +const PreprocPattern = `_bin[0-9].[0-9].png` // TODO: could restructure like so: // have the goroutine functions run outside of the main loop in the program, @@ -127,6 +129,8 @@ func main() { verboselog = log.New(n, "", log.LstdFlags) } + alreadydone := regexp.MustCompile(PreprocPattern) + verboselog.Println("Setting up AWS session") sess, err := session.NewSession(&aws.Config{ Region: aws.String("eu-west-2"), @@ -187,7 +191,7 @@ func main() { d := filepath.Join(os.TempDir(), bookname) - err = os.Mkdir(d, 0755) + err = os.MkdirAll(d, 0755) if err != nil { log.Fatalln("Failed to create directory", d, err) } @@ -203,12 +207,16 @@ func main() { go up(upc, done, uploader, bookname) - verboselog.Println("Getting list of appropriate objects to download") + verboselog.Println("Getting list of objects to download") err = s3svc.ListObjectsV2Pages(&s3.ListObjectsV2Input{ Bucket: aws.String("rescribeinprogress"), Prefix: aws.String(bookname), }, func(page *s3.ListObjectsV2Output, last bool) bool { for _, r := range page.Contents { + if alreadydone.MatchString(*r.Key) { + verboselog.Println("Skipping item that looks like it has already been processed", *r.Key) + continue + } dl <- *r.Key } return true @@ -238,5 +246,10 @@ func main() { if err != nil { log.Fatalln("Error deleting message from queue", preqname, ":", err) } + + err = os.RemoveAll(d) + if err != nil { + log.Fatalln("Failed to remove directory", d, err) + } } } -- cgit v1.2.1-24-ge1ad