diff options
| -rw-r--r-- | README | 4 | ||||
| -rw-r--r-- | aws.go | 1 | ||||
| -rw-r--r-- | cloudsettings.go | 12 | ||||
| -rw-r--r-- | doc.go | 20 | 
4 files changed, 17 insertions, 20 deletions
@@ -11,7 +11,9 @@ by running `go get rescribe.xyz/bookpipeline/...`  The commands in the cmd/ directory are at the heart of this  package. For more details on their usage, use `go doc` or read -doc.go in the package repository. The key commands are: +doc.go in the package repository. + +The key commands for the virtual server side are:    - bookpipeline    : processes items from queues, doing preprocessing,                        ocr and postprocessing, and moving items on to @@ -450,6 +450,7 @@ func (a *AwsConn) Log(v ...interface{}) {  }  // mkpipeline sets up necessary buckets and queues for the pipeline +// TODO: also set up the necessary security group and iam stuff  func (a *AwsConn) MkPipeline() error {  	buckets := []string{storageWip}  	queues := []string{queuePreProc, queueWipeOnly, queueAnalyse, queueOcrPage} diff --git a/cloudsettings.go b/cloudsettings.go index 0cf1777..fa60238 100644 --- a/cloudsettings.go +++ b/cloudsettings.go @@ -7,7 +7,13 @@ package bookpipeline  // This file contains various cloud account specific stuff; change this if  // you want to use the cloud functionality on your own site. -// Spot instance details +// Spot instance details. +// The profile needs to allow permissions to the below S3 buckets and +// SQS queues, the Sg (security group) doesn't need any permissions, +// beyond SSH if you like, and the image should have bookpipeline +// installed and ideally auto-updating. +// TODO: release ansible repository which creates AMI. +// TODO: create profile and security group with mkpipeline  const (  	spotProfile = "arn:aws:iam::557852942063:instance-profile/pipeliner"  	spotImage   = "ami-0bc6ef6900f6da5d3" @@ -15,7 +21,7 @@ const (  	spotSg      = "sg-0be8a3ab89e7136b9"  ) -// Queue names +// Queue names. Can be anything unique in SQS.  const (  	queuePreProc  = "rescribepreprocess"  	queueWipeOnly = "rescribewipeonly" @@ -23,7 +29,7 @@ const (  	queueAnalyse  = "rescribeanalyse"  ) -// Storage bucket names +// Storage bucket names. Can be anything unique in S3.  const (  	storageWip = "rescribeinprogress"  ) @@ -5,6 +5,8 @@  /*  Package bookpipeline contains various tools and functions for the OCR of  books, with a focus on distributed OCR using short-lived virtual servers. +It also contains several tools that are useful standalone; read the +accompanying README for more details.  Introduction @@ -25,8 +27,8 @@ what they do and how they work with the '-h' flag, so for example to get usage  information on the booktopipeline tool simply run the following:    booktopipeline -h -You'll also need to set up your ~/.aws/credentials appropriately so that the -tools work. +To get the pipeline tools to work for you, you'll need to change the settings +in cloudsettings.go, and set up your ~/.aws/credentials appropriately.  Managing servers @@ -127,20 +129,6 @@ which have been prebinarised.    example message: APolishGentleman_MemoirByAdamKruczkiewicz    example message: APolishGentleman_MemoirByAdamKruczkiewicz rescribefrav2 -rescribeocr - -This queue is no longer used, as it could result in processes that took more -than 12 hours to complete, which was unreliable with SQS. Instead pages are -submitted individually to the rescribeocrpage by the preprocess and wipe -functions, which has the added advantage that different pages can be processed -in parallel on different servers, enabling books to be processed significantly -faster. The code for processing books from the rescribeocr queue is still -present in bookpipeline, and the queue is still checked, but it is not -expected to be used. - -  example message: APolishGentleman_MemoirByAdamKruczkiewicz -  example message: APolishGentleman_MemoirByAdamKruczkiewicz rescribefrav2 -  rescribeocrpage  This queue contains the path of individual pages, optionally followed by  | 
