diff options
author | Nick White <git@njw.name> | 2019-08-20 16:26:02 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-08-20 16:26:02 +0100 |
commit | 522e1481f5544362027b006d5fe34609f3d366bc (patch) | |
tree | 761c8edd1d0440e3f88db9f2c0b6c3d205726fef /pipelinepreprocess/aws.go | |
parent | 9f588a71e9a2d7ad179890d0fc19372fae047b04 (diff) |
Substantially improve problematic object listing part of API
Switch to regular non-concurrent stuff, concurrency is better handled
by the main program anyway. Now we handle errors properly, and things
are way simpler.
Diffstat (limited to 'pipelinepreprocess/aws.go')
-rw-r--r-- | pipelinepreprocess/aws.go | 46 |
1 files changed, 22 insertions, 24 deletions
diff --git a/pipelinepreprocess/aws.go b/pipelinepreprocess/aws.go index 75bf81c..1ac06de 100644 --- a/pipelinepreprocess/aws.go +++ b/pipelinepreprocess/aws.go @@ -142,56 +142,54 @@ func (a *awsConn) OCRQueueHeartbeat(t *time.Ticker, msgHandle string) error { return a.QueueHeartbeat(t, msgHandle, a.ocrqurl) } -func (a *awsConn) ListObjects(bucket string, prefix string, names chan string) { +func (a *awsConn) ListObjects(bucket string, prefix string) ([]string, error) { + var names []string err := a.s3svc.ListObjectsV2Pages(&s3.ListObjectsV2Input{ Bucket: aws.String(bucket), Prefix: aws.String(prefix), }, func(page *s3.ListObjectsV2Output, last bool) bool { for _, r := range page.Contents { - names <- *r.Key + names = append(names, *r.Key) } return true }) - close(names) - if err != nil { - // TODO: handle error properly - log.Println("Error getting objects") - } + return names, err } -func (a *awsConn) ListToPreprocess(bookname string, names chan string) error { - objs := make(chan string) +func (a *awsConn) ListToPreprocess(bookname string) ([]string, error) { + var names []string preprocessed := regexp.MustCompile(PreprocPattern) - go a.ListObjects("rescribeinprogress", bookname, objs) + objs, err := a.ListObjects("rescribeinprogress", bookname) + if err != nil { + return names, err + } // Filter out any object that looks like it's already been preprocessed - for n := range objs { + for _, n := range objs { if preprocessed.MatchString(n) { a.logger.Println("Skipping item that looks like it has already been processed", n) continue } - names <- n + names = append(names, n) } - close(names) - // TODO: handle errors from ListObjects - return nil + return names, nil } -func (a *awsConn) ListToOCR(bookname string, names chan string) error { - objs := make(chan string) +func (a *awsConn) ListToOCR(bookname string) ([]string, error) { + var names []string preprocessed := regexp.MustCompile(PreprocPattern) - go a.ListObjects("rescribeinprogress", bookname, objs) - a.logger.Println("Completed running listobjects") + objs, err := a.ListObjects("rescribeinprogress", bookname) + if err != nil { + return names, err + } // Filter out any object that looks like it hasn't already been preprocessed - for n := range objs { + for _, n := range objs { if ! preprocessed.MatchString(n) { a.logger.Println("Skipping item that looks like it is not preprocessed", n) continue } - names <- n + names = append(names, n) } - close(names) - // TODO: handle errors from ListObjects - return nil + return names, nil } func (a *awsConn) AddToQueue(url string, msg string) error { |