summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--aws.go18
-rw-r--r--cmd/fonttobytes/main.go47
-rw-r--r--cmd/pdfbook/main.go5
-rw-r--r--cmd/spotme/main.go53
-rw-r--r--cmd/unstickocr/main.go114
-rw-r--r--pdf.go1
6 files changed, 237 insertions, 1 deletions
diff --git a/aws.go b/aws.go
index 4aea082..73f3b2f 100644
--- a/aws.go
+++ b/aws.go
@@ -349,3 +349,21 @@ func (a *AwsConn) GetInstanceDetails() ([]InstanceDetails, error) {
})
return details, err
}
+
+func (a *AwsConn) StartInstances(n int) error {
+ _, err := a.ec2svc.RequestSpotInstances(&ec2.RequestSpotInstancesInput{
+ InstanceCount: aws.Int64(int64(n)),
+ LaunchSpecification: &ec2.RequestSpotLaunchSpecification{
+ IamInstanceProfile: &ec2.IamInstanceProfileSpecification{
+ Arn: aws.String("arn:aws:iam::557852942063:instance-profile/pipeliner"),
+ },
+ ImageId: aws.String("ami-02cd15d68d4ca2865"),
+ InstanceType: aws.String("m5.large"),
+ SecurityGroupIds: []*string{
+ aws.String("sg-0be8a3ab89e7136b9"),
+ },
+ },
+ Type: aws.String("one-time"),
+ })
+ return err
+}
diff --git a/cmd/fonttobytes/main.go b/cmd/fonttobytes/main.go
new file mode 100644
index 0000000..011baa1
--- /dev/null
+++ b/cmd/fonttobytes/main.go
@@ -0,0 +1,47 @@
+package main
+
+import (
+ "bytes"
+ "compress/zlib"
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "log"
+)
+
+func main() {
+ flag.Usage = func() {
+ fmt.Fprintln(flag.CommandLine.Output(), "Usage: fonttobytes font.ttf")
+ flag.PrintDefaults()
+ }
+ flag.Parse()
+
+ if flag.NArg() != 1 {
+ flag.Usage()
+ return
+ }
+
+ font, err := ioutil.ReadFile(flag.Arg(0))
+ if err != nil {
+ log.Fatalln(err)
+ }
+
+ // compress with zlib
+ var buf bytes.Buffer
+ w := zlib.NewWriter(&buf)
+ w.Write(font)
+ w.Close()
+
+ // this could be done more simply with %+v, but that takes up
+ // significantly more space due to printing each byte in hex
+ // rather than dec format.
+
+ fmt.Printf("[]byte{")
+ for i, b := range buf.Bytes() {
+ if i > 0 {
+ fmt.Printf(", ")
+ }
+ fmt.Printf("%d", b)
+ }
+ fmt.Printf("}\n")
+}
diff --git a/cmd/pdfbook/main.go b/cmd/pdfbook/main.go
index bdb486d..79db353 100644
--- a/cmd/pdfbook/main.go
+++ b/cmd/pdfbook/main.go
@@ -112,7 +112,10 @@ func main() {
}
pdf := new(bookpipeline.Fpdf)
- pdf.Setup()
+ err = pdf.Setup()
+ if err != nil {
+ log.Fatalln("Failed to set up PDF", err)
+ }
if os.IsNotExist(err) {
err = filepath.Walk(flag.Arg(0), walker(pdf, *colour))
diff --git a/cmd/spotme/main.go b/cmd/spotme/main.go
new file mode 100644
index 0000000..99404cb
--- /dev/null
+++ b/cmd/spotme/main.go
@@ -0,0 +1,53 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+
+ "rescribe.xyz/bookpipeline"
+)
+
+const usage = `Usage: spotme [-n num]
+
+Create new spot instances for the book pipeline.
+`
+
+type SpotPipeliner interface {
+ Init() error
+ StartInstances(n int) error
+}
+
+// NullWriter is used so non-verbose logging may be discarded
+type NullWriter bool
+
+func (w NullWriter) Write(p []byte) (n int, err error) {
+ return len(p), nil
+}
+
+func main() {
+ num := flag.Int("n", 1, "number of instances to start")
+ flag.Usage = func() {
+ fmt.Fprintf(flag.CommandLine.Output(), usage)
+ flag.PrintDefaults()
+ }
+ flag.Parse()
+
+ var verboselog *log.Logger
+ var n NullWriter
+ verboselog = log.New(n, "", 0)
+
+ var conn SpotPipeliner
+ conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
+ err := conn.Init()
+ if err != nil {
+ log.Fatalln("Failed to set up cloud connection:", err)
+ }
+
+ log.Println("Starting spot instances")
+ err = conn.StartInstances(*num)
+ if err != nil {
+ log.Fatalln("Failed to start a spot instance:", err)
+ }
+ log.Println("Spot instance request sent successfully")
+}
diff --git a/cmd/unstickocr/main.go b/cmd/unstickocr/main.go
new file mode 100644
index 0000000..27f3c0d
--- /dev/null
+++ b/cmd/unstickocr/main.go
@@ -0,0 +1,114 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "os"
+ "time"
+
+ "rescribe.xyz/bookpipeline"
+)
+
+const usage = `Usage: unstickocr [-v] bookname
+
+unstickocr deletes a book from the OCR queue and adds it to the
+Analyse queue.
+
+This should be done automatically by the bookpipeline tool once
+the OCR job has completed, but sometimes it isn't, because of a
+nasty bug. Once that bug is squashed, this tool can be deleted.
+`
+
+// null writer to enable non-verbose logging to be discarded
+type NullWriter bool
+
+func (w NullWriter) Write(p []byte) (n int, err error) {
+ return len(p), nil
+}
+
+type UnstickPipeliner interface {
+ Init() error
+ CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error)
+ AddToQueue(url string, msg string) error
+ DelFromQueue(url string, handle string) error
+ OCRQueueId() string
+ AnalyseQueueId() string
+}
+
+func main() {
+ verbose := flag.Bool("v", false, "verbose")
+ flag.Usage = func() {
+ fmt.Fprintf(flag.CommandLine.Output(), usage)
+ flag.PrintDefaults()
+ }
+ flag.Parse()
+
+ if flag.NArg() != 1 {
+ flag.Usage()
+ return
+ }
+
+ var verboselog *log.Logger
+ if *verbose {
+ verboselog = log.New(os.Stdout, "", 0)
+ } else {
+ var n NullWriter
+ verboselog = log.New(n, "", 0)
+ }
+
+ var conn UnstickPipeliner
+ conn = &bookpipeline.AwsConn{Region: "eu-west-2", Logger: verboselog}
+
+ err := conn.Init()
+ if err != nil {
+ log.Fatalln("Error setting up cloud connection:", err)
+ }
+
+ book := flag.Arg(0)
+ done := false
+
+ for a := 0; a < 5; a++ {
+ for i := 0; i < 10; i++ {
+ verboselog.Println("Checking OCR queue for", book)
+ msg, err := conn.CheckQueue(conn.OCRQueueId(), 10)
+ if err != nil {
+ log.Fatalln("Error checking OCR queue:", err)
+ continue
+ }
+ if msg.Handle == "" {
+ verboselog.Println("No message received on OCR queue")
+ continue
+ }
+ if msg.Body != book {
+ verboselog.Println("Message received on OCR queue is not the one we're",
+ "looking for, so will try again - found", msg.Body)
+ continue
+ }
+ err = conn.DelFromQueue(conn.OCRQueueId(), msg.Handle)
+ if err != nil {
+ log.Fatalln("Error deleting message from OCR queue:", err)
+ }
+ err = conn.AddToQueue(conn.AnalyseQueueId(), book)
+ if err != nil {
+ log.Fatalln("Error adding message to Analyse queue:", err)
+ }
+ done = true
+ break
+ }
+ if done == true {
+ break
+ }
+ log.Println("No message found yet, sleeping for 30 seconds to try again")
+ time.Sleep(30 * time.Minute)
+ }
+
+ if done == true {
+ fmt.Println("Succeeded moving message from OCR queue to Analyse queue.")
+ } else {
+ log.Fatalln("Failed to find message", book, "on OCR queue; is it still being processed?",
+ "It can only be discovered and processed by this tool when it is available.",
+ "Try shutting down any instance that is using it, waiting a few minutes,",
+ "and rerunning this tool.")
+ }
+}
diff --git a/pdf.go b/pdf.go
index 908f0b6..9fa52f3 100644
--- a/pdf.go
+++ b/pdf.go
@@ -16,6 +16,7 @@ import (
"rescribe.xyz/utils/pkg/hocr"
)
+// TODO: maybe set this in Fpdf struct
const pageWidth = 5 // pageWidth in inches
// pxToPt converts a pixel value into a pt value (72 pts per inch)