From a610e77c381850f28e504617cda08623f74cb208 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 3 Dec 2019 13:43:08 +0000 Subject: Rewrite lspipeline book listing part to be much faster by taking advantage of the aws CommonPrefixes output --- cmd/lspipeline/main.go | 117 ++++++++++++++----------------------------------- 1 file changed, 32 insertions(+), 85 deletions(-) (limited to 'cmd') diff --git a/cmd/lspipeline/main.go b/cmd/lspipeline/main.go index a32d851..0128c60 100644 --- a/cmd/lspipeline/main.go +++ b/cmd/lspipeline/main.go @@ -1,13 +1,11 @@ package main import ( - "errors" "flag" "fmt" "log" "os/exec" "sort" - "strings" "rescribe.xyz/bookpipeline" ) @@ -33,6 +31,7 @@ type LsPipeliner interface { GetQueueDetails(url string) (string, string, error) GetInstanceDetails() ([]bookpipeline.InstanceDetails, error) ListObjectsWithMeta(bucket string, prefix string) ([]bookpipeline.ObjMeta, error) + ListObjectPrefixes(bucket string) ([]string, error) WIPStorageId() string } @@ -97,102 +96,50 @@ func (o ObjMetas) Less(i, j int) bool { return o[i].Date.Before(o[j].Date) } -// sortBookList sorts a list of book names by date. -// It uses a list of filenames and dates in an ObjMeta slice to -// determine the date for a book name. -func sortBookList(list []string, fileinfo []bookpipeline.ObjMeta) ([]string, error) { - var listinfo ObjMetas - - for _, name := range list { - found := false - for _, f := range fileinfo { - parts := strings.Split(f.Name, "/") - prefix := parts[0] - if name == prefix { - listinfo = append(listinfo, bookpipeline.ObjMeta{Name: name, Date: f.Date}) - found = true - break - } - } - if !found { - return list, errors.New("Failed to find metadata for list") - } - } - - // sort listinfo by date - sort.Sort(listinfo) - - var l []string - for _, i := range listinfo { - l = append(l, i.Name) - } - return l, nil -} - // getBookStatus returns a list of in progress and done books. -// It determines this by listing all objects, and splitting the -// prefixes into two lists, those which have a 'graph.png' file, -// which are classed as done, and those which are not. These are -// sorted by date according to file metadata. +// It determines this by finding all prefixes, and splitting them +// into two lists, those which have a 'graph.png' file (the done +// list), and those which do not (the inprogress list). They are +// sorted according to the date of the graph.png file, or the date +// of a random file with the prefix if no graph.png was found. func getBookStatus(conn LsPipeliner) (inprogress []string, done []string, err error) { - allfiles, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), "") + prefixes, err := conn.ListObjectPrefixes(conn.WIPStorageId()) + var inprogressmeta, donemeta ObjMetas if err != nil { - log.Println("Error getting list of objects:", err) - return inprogress, done, err + log.Println("Error getting object prefixes:", err) + return } - for _, f := range allfiles { - parts := strings.Split(f.Name, "/") - if parts[1] != "graph.png" { - continue - } - prefix := parts[0] - found := false - for _, i := range done { - if i == prefix { - found = true - continue - } - } - if !found { - done = append(done, prefix) + // Search for graph.png to determine done books (and save the date of it to sort with) + for _, p := range prefixes { + objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), p + "graph.png") + if err != nil || len(objs) == 0 { + inprogressmeta = append(inprogressmeta, bookpipeline.ObjMeta{Name: p}) + } else { + donemeta = append(donemeta, bookpipeline.ObjMeta{Name: p, Date: objs[0].Date}) } } - - for _, f := range allfiles { - parts := strings.Split(f.Name, "/") - prefix := parts[0] - found := false - for _, i := range done { - if i == prefix { - found = true - continue - } - } - for _, i := range inprogress { - if i == prefix { - found = true - continue - } - } - if !found { - inprogress = append(inprogress, prefix) + // Get a random file from the inprogress list to get a date to sort by + for _, i := range inprogressmeta { + objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), i.Name) + if err != nil || len(objs) == 0 { + continue } + i.Date = objs[0].Date } - - inprogress, err = sortBookList(inprogress, allfiles) - if err != nil { - log.Println("Error sorting list of objects:", err) - err = nil + sort.Sort(donemeta) + for _, i := range donemeta { + done = append(done, i.Name) } - done, err = sortBookList(done, allfiles) - if err != nil { - log.Println("Error sorting list of objects:", err) - err = nil + sort.Sort(inprogressmeta) + for _, i := range inprogressmeta { + inprogress = append(inprogress, i.Name) } - return inprogress, done, err + return } +// getBookStatusChan runs getBookStatus and sends its results to +// channels for the done and receive arrays. func getBookStatusChan(conn LsPipeliner, inprogressc chan string, donec chan string) { inprogress, done, err := getBookStatus(conn) if err != nil { -- cgit v1.2.1-24-ge1ad