From 0d914a5de3f8169d41df4fcff1ee4aea6d01afbe Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 24 Nov 2020 12:40:54 +0000 Subject: [booktopipeline] Add a check to disallow adding a book that already exists This is important as if a book is added which has already been done, then an analyse job will be added every time a page is OCRed, which will clog up the pipeline with unnecessary work. Also if a book was added with the same name but differently named files, or a different number of pages, the results would almost certainly not be as intended. In the case of a book really wanting to be added with a particular name, either the original directory can be removed on S3, or "v2" or similar can be appended to the book name before calling booktopipeline. --- cmd/booktopipeline/main.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go index 7254d78..b4f4d99 100644 --- a/cmd/booktopipeline/main.go +++ b/cmd/booktopipeline/main.go @@ -102,6 +102,15 @@ func main() { log.Fatalln(err) } + verboselog.Println("Checking that a book hasn't already been uploaded with that name") + list, err := conn.ListObjects(conn.WIPStorageId(), bookname) + if err != nil { + log.Fatalln(err) + } + if len(list) > 0 { + log.Fatalf("Error: There is already a book in S3 named %s", bookname) + } + verboselog.Println("Uploading all images are valid in", bookdir) err = pipeline.UploadImages(bookdir, bookname, conn) if err != nil { -- cgit v1.2.1-24-ge1ad