From 0d914a5de3f8169d41df4fcff1ee4aea6d01afbe Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Tue, 24 Nov 2020 12:40:54 +0000
Subject: [booktopipeline] Add a check to disallow adding a book that already
 exists

This is important as if a book is added which has already been done,
then an analyse job will be added every time a page is OCRed, which
will clog up the pipeline with unnecessary work. Also if a book was
added with the same name but differently named files, or a different
number of pages, the results would almost certainly not be as
intended.

In the case of a book really wanting to be added with a particular
name, either the original directory can be removed on S3, or "v2"
or similar can be appended to the book name before calling
booktopipeline.
---
 cmd/booktopipeline/main.go | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'cmd/booktopipeline')

diff --git a/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go
index 7254d78..b4f4d99 100644
--- a/cmd/booktopipeline/main.go
+++ b/cmd/booktopipeline/main.go
@@ -102,6 +102,15 @@ func main() {
 		log.Fatalln(err)
 	}
 
+	verboselog.Println("Checking that a book hasn't already been uploaded with that name")
+	list, err := conn.ListObjects(conn.WIPStorageId(), bookname)
+	if err != nil {
+		log.Fatalln(err)
+	}
+	if len(list) > 0 {
+		log.Fatalf("Error: There is already a book in S3 named %s", bookname)
+	}
+
 	verboselog.Println("Uploading all images are valid in", bookdir)
 	err = pipeline.UploadImages(bookdir, bookname, conn)
 	if err != nil {
-- 
cgit v1.2.1-24-ge1ad