diff options
| author | Nick White <git@njw.name> | 2019-12-03 13:43:08 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-12-03 13:43:08 +0000 | 
| commit | a610e77c381850f28e504617cda08623f74cb208 (patch) | |
| tree | f264924ffb6421c0c97b36c4ac7d278da9947aaa | |
| parent | a2f98474f6e4ce35a4b191ab27b3293ebcdff418 (diff) | |
Rewrite lspipeline book listing part to be much faster by taking advantage of the aws CommonPrefixes output
| -rw-r--r-- | aws.go | 15 | ||||
| -rw-r--r-- | cmd/lspipeline/main.go | 117 | 
2 files changed, 47 insertions, 85 deletions
| @@ -274,6 +274,21 @@ func (a *AwsConn) ListObjectsWithMeta(bucket string, prefix string) ([]ObjMeta,  	return objs, err  } +func (a *AwsConn) ListObjectPrefixes(bucket string) ([]string, error) { +	var prefixes []string +	err := a.s3svc.ListObjectsV2Pages(&s3.ListObjectsV2Input{ +		Bucket: aws.String(bucket), +		Delimiter: aws.String("/"), +		MaxKeys: aws.Int64(1), +	}, func(page *s3.ListObjectsV2Output, last bool) bool { +		for _, r := range page.CommonPrefixes { +			prefixes = append(prefixes, *r.Prefix) +		} +		return true +	}) +	return prefixes, err +} +  func (a *AwsConn) AddToQueue(url string, msg string) error {  	_, err := a.sqssvc.SendMessage(&sqs.SendMessageInput{  		MessageBody: &msg, diff --git a/cmd/lspipeline/main.go b/cmd/lspipeline/main.go index a32d851..0128c60 100644 --- a/cmd/lspipeline/main.go +++ b/cmd/lspipeline/main.go @@ -1,13 +1,11 @@  package main  import ( -	"errors"  	"flag"  	"fmt"  	"log"  	"os/exec"  	"sort" -	"strings"  	"rescribe.xyz/bookpipeline"  ) @@ -33,6 +31,7 @@ type LsPipeliner interface {  	GetQueueDetails(url string) (string, string, error)  	GetInstanceDetails() ([]bookpipeline.InstanceDetails, error)  	ListObjectsWithMeta(bucket string, prefix string) ([]bookpipeline.ObjMeta, error) +	ListObjectPrefixes(bucket string) ([]string, error)  	WIPStorageId() string  } @@ -97,102 +96,50 @@ func (o ObjMetas) Less(i, j int) bool {  	return o[i].Date.Before(o[j].Date)  } -// sortBookList sorts a list of book names by date. -// It uses a list of filenames and dates in an ObjMeta slice to -// determine the date for a book name. -func sortBookList(list []string, fileinfo []bookpipeline.ObjMeta) ([]string, error) { -	var listinfo ObjMetas - -	for _, name := range list { -		found := false -		for _, f := range fileinfo { -			parts := strings.Split(f.Name, "/") -			prefix := parts[0] -			if name == prefix { -				listinfo = append(listinfo, bookpipeline.ObjMeta{Name: name, Date: f.Date}) -				found = true -				break -			} -		} -		if !found { -			return list, errors.New("Failed to find metadata for list") -		} -	} - -	// sort listinfo by date -	sort.Sort(listinfo) - -	var l []string -	for _, i := range listinfo { -		l = append(l, i.Name) -	} -	return l, nil -} -  // getBookStatus returns a list of in progress and done books. -// It determines this by listing all objects, and splitting the -// prefixes into two lists, those which have a 'graph.png' file, -// which are classed as done, and those which are not. These are -// sorted by date according to file metadata. +// It determines this by finding all prefixes, and splitting them +// into two lists, those which have a 'graph.png' file (the done +// list), and those which do not (the inprogress list). They are +// sorted according to the date of the graph.png file, or the date +// of a random file with the prefix if no graph.png was found.  func getBookStatus(conn LsPipeliner) (inprogress []string, done []string, err error) { -	allfiles, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), "") +	prefixes, err := conn.ListObjectPrefixes(conn.WIPStorageId()) +	var inprogressmeta, donemeta ObjMetas  	if err != nil { -		log.Println("Error getting list of objects:", err) -		return inprogress, done, err +		log.Println("Error getting object prefixes:", err) +		return  	} -	for _, f := range allfiles { -		parts := strings.Split(f.Name, "/") -		if parts[1] != "graph.png" { -			continue -		} -		prefix := parts[0] -		found := false -		for _, i := range done { -			if i == prefix { -				found = true -				continue -			} -		} -		if !found { -			done = append(done, prefix) +	// Search for graph.png to determine done books (and save the date of it to sort with) +	for _, p := range prefixes { +		objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), p + "graph.png") +		if err != nil || len(objs) == 0 { +			inprogressmeta = append(inprogressmeta, bookpipeline.ObjMeta{Name: p}) +		} else { +			donemeta = append(donemeta, bookpipeline.ObjMeta{Name: p, Date: objs[0].Date})  		}  	} - -	for _, f := range allfiles { -		parts := strings.Split(f.Name, "/") -		prefix := parts[0] -		found := false -		for _, i := range done { -			if i == prefix { -				found = true -				continue -			} -		} -		for _, i := range inprogress { -			if i == prefix { -				found = true -				continue -			} -		} -		if !found { -			inprogress = append(inprogress, prefix) +	// Get a random file from the inprogress list to get a date to sort by +	for _, i := range inprogressmeta { +		objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), i.Name) +		if err != nil || len(objs) == 0 { +			continue  		} +		i.Date = objs[0].Date  	} - -	inprogress, err = sortBookList(inprogress, allfiles) -	if err != nil { -		log.Println("Error sorting list of objects:", err) -		err = nil +	sort.Sort(donemeta) +	for _, i := range donemeta { +		done = append(done, i.Name)  	} -	done, err = sortBookList(done, allfiles) -	if err != nil { -		log.Println("Error sorting list of objects:", err) -		err = nil +	sort.Sort(inprogressmeta) +	for _, i := range inprogressmeta { +		inprogress = append(inprogress, i.Name)  	} -	return inprogress, done, err +	return  } +// getBookStatusChan runs getBookStatus and sends its results to +// channels for the done and receive arrays.  func getBookStatusChan(conn LsPipeliner, inprogressc chan string, donec chan string) {  	inprogress, done, err := getBookStatus(conn)  	if err != nil { | 
