diff options
author | Nick White <git@njw.name> | 2020-04-07 12:54:07 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2020-04-07 12:54:07 +0100 |
commit | 913a6023459700e213eb0b03bb262555fc2710f5 (patch) | |
tree | ff7379df6da4e477d19a06c1c8316ba55b7e8b6a | |
parent | 9fb0842f07320d47509865c689747d2c82379f3d (diff) |
Improve documentation
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | aws.go | 1 | ||||
-rw-r--r-- | cloudsettings.go | 12 | ||||
-rw-r--r-- | doc.go | 20 |
4 files changed, 17 insertions, 20 deletions
@@ -11,7 +11,9 @@ by running `go get rescribe.xyz/bookpipeline/...` The commands in the cmd/ directory are at the heart of this package. For more details on their usage, use `go doc` or read -doc.go in the package repository. The key commands are: +doc.go in the package repository. + +The key commands for the virtual server side are: - bookpipeline : processes items from queues, doing preprocessing, ocr and postprocessing, and moving items on to @@ -450,6 +450,7 @@ func (a *AwsConn) Log(v ...interface{}) { } // mkpipeline sets up necessary buckets and queues for the pipeline +// TODO: also set up the necessary security group and iam stuff func (a *AwsConn) MkPipeline() error { buckets := []string{storageWip} queues := []string{queuePreProc, queueWipeOnly, queueAnalyse, queueOcrPage} diff --git a/cloudsettings.go b/cloudsettings.go index 0cf1777..fa60238 100644 --- a/cloudsettings.go +++ b/cloudsettings.go @@ -7,7 +7,13 @@ package bookpipeline // This file contains various cloud account specific stuff; change this if // you want to use the cloud functionality on your own site. -// Spot instance details +// Spot instance details. +// The profile needs to allow permissions to the below S3 buckets and +// SQS queues, the Sg (security group) doesn't need any permissions, +// beyond SSH if you like, and the image should have bookpipeline +// installed and ideally auto-updating. +// TODO: release ansible repository which creates AMI. +// TODO: create profile and security group with mkpipeline const ( spotProfile = "arn:aws:iam::557852942063:instance-profile/pipeliner" spotImage = "ami-0bc6ef6900f6da5d3" @@ -15,7 +21,7 @@ const ( spotSg = "sg-0be8a3ab89e7136b9" ) -// Queue names +// Queue names. Can be anything unique in SQS. const ( queuePreProc = "rescribepreprocess" queueWipeOnly = "rescribewipeonly" @@ -23,7 +29,7 @@ const ( queueAnalyse = "rescribeanalyse" ) -// Storage bucket names +// Storage bucket names. Can be anything unique in S3. const ( storageWip = "rescribeinprogress" ) @@ -5,6 +5,8 @@ /* Package bookpipeline contains various tools and functions for the OCR of books, with a focus on distributed OCR using short-lived virtual servers. +It also contains several tools that are useful standalone; read the +accompanying README for more details. Introduction @@ -25,8 +27,8 @@ what they do and how they work with the '-h' flag, so for example to get usage information on the booktopipeline tool simply run the following: booktopipeline -h -You'll also need to set up your ~/.aws/credentials appropriately so that the -tools work. +To get the pipeline tools to work for you, you'll need to change the settings +in cloudsettings.go, and set up your ~/.aws/credentials appropriately. Managing servers @@ -127,20 +129,6 @@ which have been prebinarised. example message: APolishGentleman_MemoirByAdamKruczkiewicz example message: APolishGentleman_MemoirByAdamKruczkiewicz rescribefrav2 -rescribeocr - -This queue is no longer used, as it could result in processes that took more -than 12 hours to complete, which was unreliable with SQS. Instead pages are -submitted individually to the rescribeocrpage by the preprocess and wipe -functions, which has the added advantage that different pages can be processed -in parallel on different servers, enabling books to be processed significantly -faster. The code for processing books from the rescribeocr queue is still -present in bookpipeline, and the queue is still checked, but it is not -expected to be used. - - example message: APolishGentleman_MemoirByAdamKruczkiewicz - example message: APolishGentleman_MemoirByAdamKruczkiewicz rescribefrav2 - rescribeocrpage This queue contains the path of individual pages, optionally followed by |