summaryrefslogtreecommitdiff
path: root/pipelinepreprocess/aws.go
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-08-20 16:26:02 +0100
committerNick White <git@njw.name>2019-08-20 16:26:02 +0100
commit522e1481f5544362027b006d5fe34609f3d366bc (patch)
tree761c8edd1d0440e3f88db9f2c0b6c3d205726fef /pipelinepreprocess/aws.go
parent9f588a71e9a2d7ad179890d0fc19372fae047b04 (diff)
Substantially improve problematic object listing part of API
Switch to regular non-concurrent stuff, concurrency is better handled by the main program anyway. Now we handle errors properly, and things are way simpler.
Diffstat (limited to 'pipelinepreprocess/aws.go')
-rw-r--r--pipelinepreprocess/aws.go46
1 files changed, 22 insertions, 24 deletions
diff --git a/pipelinepreprocess/aws.go b/pipelinepreprocess/aws.go
index 75bf81c..1ac06de 100644
--- a/pipelinepreprocess/aws.go
+++ b/pipelinepreprocess/aws.go
@@ -142,56 +142,54 @@ func (a *awsConn) OCRQueueHeartbeat(t *time.Ticker, msgHandle string) error {
return a.QueueHeartbeat(t, msgHandle, a.ocrqurl)
}
-func (a *awsConn) ListObjects(bucket string, prefix string, names chan string) {
+func (a *awsConn) ListObjects(bucket string, prefix string) ([]string, error) {
+ var names []string
err := a.s3svc.ListObjectsV2Pages(&s3.ListObjectsV2Input{
Bucket: aws.String(bucket),
Prefix: aws.String(prefix),
}, func(page *s3.ListObjectsV2Output, last bool) bool {
for _, r := range page.Contents {
- names <- *r.Key
+ names = append(names, *r.Key)
}
return true
})
- close(names)
- if err != nil {
- // TODO: handle error properly
- log.Println("Error getting objects")
- }
+ return names, err
}
-func (a *awsConn) ListToPreprocess(bookname string, names chan string) error {
- objs := make(chan string)
+func (a *awsConn) ListToPreprocess(bookname string) ([]string, error) {
+ var names []string
preprocessed := regexp.MustCompile(PreprocPattern)
- go a.ListObjects("rescribeinprogress", bookname, objs)
+ objs, err := a.ListObjects("rescribeinprogress", bookname)
+ if err != nil {
+ return names, err
+ }
// Filter out any object that looks like it's already been preprocessed
- for n := range objs {
+ for _, n := range objs {
if preprocessed.MatchString(n) {
a.logger.Println("Skipping item that looks like it has already been processed", n)
continue
}
- names <- n
+ names = append(names, n)
}
- close(names)
- // TODO: handle errors from ListObjects
- return nil
+ return names, nil
}
-func (a *awsConn) ListToOCR(bookname string, names chan string) error {
- objs := make(chan string)
+func (a *awsConn) ListToOCR(bookname string) ([]string, error) {
+ var names []string
preprocessed := regexp.MustCompile(PreprocPattern)
- go a.ListObjects("rescribeinprogress", bookname, objs)
- a.logger.Println("Completed running listobjects")
+ objs, err := a.ListObjects("rescribeinprogress", bookname)
+ if err != nil {
+ return names, err
+ }
// Filter out any object that looks like it hasn't already been preprocessed
- for n := range objs {
+ for _, n := range objs {
if ! preprocessed.MatchString(n) {
a.logger.Println("Skipping item that looks like it is not preprocessed", n)
continue
}
- names <- n
+ names = append(names, n)
}
- close(names)
- // TODO: handle errors from ListObjects
- return nil
+ return names, nil
}
func (a *awsConn) AddToQueue(url string, msg string) error {