From 913a6023459700e213eb0b03bb262555fc2710f5 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 7 Apr 2020 12:54:07 +0100 Subject: Improve documentation --- README | 4 +++- aws.go | 1 + cloudsettings.go | 12 +++++++++--- doc.go | 20 ++++---------------- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/README b/README index e5e918f..5da6c6b 100644 --- a/README +++ b/README @@ -11,7 +11,9 @@ by running `go get rescribe.xyz/bookpipeline/...` The commands in the cmd/ directory are at the heart of this package. For more details on their usage, use `go doc` or read -doc.go in the package repository. The key commands are: +doc.go in the package repository. + +The key commands for the virtual server side are: - bookpipeline : processes items from queues, doing preprocessing, ocr and postprocessing, and moving items on to diff --git a/aws.go b/aws.go index 2015d9c..70189ea 100644 --- a/aws.go +++ b/aws.go @@ -450,6 +450,7 @@ func (a *AwsConn) Log(v ...interface{}) { } // mkpipeline sets up necessary buckets and queues for the pipeline +// TODO: also set up the necessary security group and iam stuff func (a *AwsConn) MkPipeline() error { buckets := []string{storageWip} queues := []string{queuePreProc, queueWipeOnly, queueAnalyse, queueOcrPage} diff --git a/cloudsettings.go b/cloudsettings.go index 0cf1777..fa60238 100644 --- a/cloudsettings.go +++ b/cloudsettings.go @@ -7,7 +7,13 @@ package bookpipeline // This file contains various cloud account specific stuff; change this if // you want to use the cloud functionality on your own site. -// Spot instance details +// Spot instance details. +// The profile needs to allow permissions to the below S3 buckets and +// SQS queues, the Sg (security group) doesn't need any permissions, +// beyond SSH if you like, and the image should have bookpipeline +// installed and ideally auto-updating. +// TODO: release ansible repository which creates AMI. +// TODO: create profile and security group with mkpipeline const ( spotProfile = "arn:aws:iam::557852942063:instance-profile/pipeliner" spotImage = "ami-0bc6ef6900f6da5d3" @@ -15,7 +21,7 @@ const ( spotSg = "sg-0be8a3ab89e7136b9" ) -// Queue names +// Queue names. Can be anything unique in SQS. const ( queuePreProc = "rescribepreprocess" queueWipeOnly = "rescribewipeonly" @@ -23,7 +29,7 @@ const ( queueAnalyse = "rescribeanalyse" ) -// Storage bucket names +// Storage bucket names. Can be anything unique in S3. const ( storageWip = "rescribeinprogress" ) diff --git a/doc.go b/doc.go index b7d9d99..59037cd 100644 --- a/doc.go +++ b/doc.go @@ -5,6 +5,8 @@ /* Package bookpipeline contains various tools and functions for the OCR of books, with a focus on distributed OCR using short-lived virtual servers. +It also contains several tools that are useful standalone; read the +accompanying README for more details. Introduction @@ -25,8 +27,8 @@ what they do and how they work with the '-h' flag, so for example to get usage information on the booktopipeline tool simply run the following: booktopipeline -h -You'll also need to set up your ~/.aws/credentials appropriately so that the -tools work. +To get the pipeline tools to work for you, you'll need to change the settings +in cloudsettings.go, and set up your ~/.aws/credentials appropriately. Managing servers @@ -127,20 +129,6 @@ which have been prebinarised. example message: APolishGentleman_MemoirByAdamKruczkiewicz example message: APolishGentleman_MemoirByAdamKruczkiewicz rescribefrav2 -rescribeocr - -This queue is no longer used, as it could result in processes that took more -than 12 hours to complete, which was unreliable with SQS. Instead pages are -submitted individually to the rescribeocrpage by the preprocess and wipe -functions, which has the added advantage that different pages can be processed -in parallel on different servers, enabling books to be processed significantly -faster. The code for processing books from the rescribeocr queue is still -present in bookpipeline, and the queue is still checked, but it is not -expected to be used. - - example message: APolishGentleman_MemoirByAdamKruczkiewicz - example message: APolishGentleman_MemoirByAdamKruczkiewicz rescribefrav2 - rescribeocrpage This queue contains the path of individual pages, optionally followed by -- cgit v1.2.1-24-ge1ad