summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--aws.go16
-rw-r--r--cmd/bookpipeline/main.go310
-rw-r--r--cmd/getpipelinebook/main.go8
-rw-r--r--cmd/lspipeline/main.go2
-rw-r--r--graph.go16
-rw-r--r--pdf.go4
6 files changed, 178 insertions, 178 deletions
diff --git a/aws.go b/aws.go
index 74e6142..5f1befa 100644
--- a/aws.go
+++ b/aws.go
@@ -187,7 +187,7 @@ func (a *AwsConn) QueueHeartbeat(msg Qmsg, qurl string, duration int64) (Qmsg, e
VisibilityTimeout: aws.Int64(0),
})
- for i := 0; i < int(duration) * 5; i++ {
+ for i := 0; i < int(duration)*5; i++ {
msgResult, err := a.sqssvc.ReceiveMessage(&sqs.ReceiveMessageInput{
MaxNumberOfMessages: aws.Int64(10),
VisibilityTimeout: &duration,
@@ -292,9 +292,9 @@ func (a *AwsConn) ListObjectsWithMeta(bucket string, prefix string) ([]ObjMeta,
func (a *AwsConn) ListObjectPrefixes(bucket string) ([]string, error) {
var prefixes []string
err := a.s3svc.ListObjectsV2Pages(&s3.ListObjectsV2Input{
- Bucket: aws.String(bucket),
+ Bucket: aws.String(bucket),
Delimiter: aws.String("/"),
- MaxKeys: aws.Int64(1),
+ MaxKeys: aws.Int64(1),
}, func(page *s3.ListObjectsV2Output, last bool) bool {
for _, r := range page.CommonPrefixes {
prefixes = append(prefixes, *r.Prefix)
@@ -327,9 +327,9 @@ func (a *AwsConn) CreateQueue(name string) error {
_, err := a.sqssvc.CreateQueue(&sqs.CreateQueueInput{
QueueName: aws.String(name),
Attributes: map[string]*string{
- "VisibilityTimeout": aws.String("120"), // 2 minutes
- "MessageRetentionPeriod": aws.String("1209600"), // 14 days; max allowed by sqs
- "ReceiveMessageWaitTimeSeconds": aws.String("20"),
+ "VisibilityTimeout": aws.String("120"), // 2 minutes
+ "MessageRetentionPeriod": aws.String("1209600"), // 14 days; max allowed by sqs
+ "ReceiveMessageWaitTimeSeconds": aws.String("20"),
},
})
if err != nil {
@@ -339,7 +339,7 @@ func (a *AwsConn) CreateQueue(name string) error {
// quietly ignores the CreateQueue request if it is identical to an
// existing queue.
if ok && aerr.Code() == sqs.ErrCodeQueueNameExists {
- return errors.New("Error: Queue already exists but has different attributes:" + name)
+ return errors.New("Error: Queue already exists but has different attributes:" + name)
} else {
return errors.New(fmt.Sprintf("Error creating queue %s: %v", name, err))
}
@@ -447,7 +447,7 @@ func (a *AwsConn) StartInstances(n int) error {
IamInstanceProfile: &ec2.IamInstanceProfileSpecification{
Arn: aws.String(spotProfile),
},
- ImageId: aws.String(spotImage),
+ ImageId: aws.String(spotImage),
InstanceType: aws.String(spotType),
SecurityGroupIds: []*string{
aws.String(spotSg),
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 7b45541..8d2ffcc 100644
--- a/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -13,8 +13,8 @@ import (
"os/exec"
"path/filepath"
"regexp"
- "strings"
"sort"
+ "strings"
"time"
"rescribe.xyz/bookpipeline"
@@ -121,7 +121,7 @@ func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, b
return
}
logger.Println("Adding", key, training, "to queue", toQueue)
- err = conn.AddToQueue(toQueue, key + " " + training)
+ err = conn.AddToQueue(toQueue, key+" "+training)
if err != nil {
for range c {
} // consume the rest of the receiving channel so it isn't blocked
@@ -195,192 +195,192 @@ func ocr(training string) func(chan string, chan string, chan error, *log.Logger
func analyse(conn Pipeliner) func(chan string, chan string, chan error, *log.Logger) {
return func(toanalyse chan string, up chan string, errc chan error, logger *log.Logger) {
- confs := make(map[string][]*bookpipeline.Conf)
- bestconfs := make(map[string]*bookpipeline.Conf)
- savedir := ""
-
- for path := range toanalyse {
- if savedir == "" {
- savedir = filepath.Dir(path)
- }
- logger.Println("Calculating confidence for", path)
- avg, err := hocr.GetAvgConf(path)
- if err != nil && err.Error() == "No words found" {
- continue
- }
- if err != nil {
- for range toanalyse {
- } // consume the rest of the receiving channel so it isn't blocked
- close(up)
- errc <- fmt.Errorf("Error retreiving confidence for %s: %s", path, err)
- return
- }
- base := filepath.Base(path)
- codestart := strings.Index(base, "_bin")
- name := base[0:codestart]
- var c bookpipeline.Conf
- c.Path = path
- c.Code = base[codestart:]
- c.Conf = avg
- confs[name] = append(confs[name], &c)
- }
-
- fn := filepath.Join(savedir, "conf")
- logger.Println("Saving confidences in file", fn)
- f, err := os.Create(fn)
- if err != nil {
- close(up)
- errc <- fmt.Errorf("Error creating file %s: %s", fn, err)
- return
- }
- defer f.Close()
+ confs := make(map[string][]*bookpipeline.Conf)
+ bestconfs := make(map[string]*bookpipeline.Conf)
+ savedir := ""
- logger.Println("Finding best confidence for each page, and saving all confidences")
- for base, conf := range confs {
- var best float64
- for _, c := range conf {
- if c.Conf > best {
- best = c.Conf
- bestconfs[base] = c
+ for path := range toanalyse {
+ if savedir == "" {
+ savedir = filepath.Dir(path)
+ }
+ logger.Println("Calculating confidence for", path)
+ avg, err := hocr.GetAvgConf(path)
+ if err != nil && err.Error() == "No words found" {
+ continue
}
- _, err = fmt.Fprintf(f, "%s\t%02.f\n", c.Path, c.Conf)
if err != nil {
+ for range toanalyse {
+ } // consume the rest of the receiving channel so it isn't blocked
close(up)
- errc <- fmt.Errorf("Error writing confidences file: %s", err)
+ errc <- fmt.Errorf("Error retreiving confidence for %s: %s", path, err)
return
}
+ base := filepath.Base(path)
+ codestart := strings.Index(base, "_bin")
+ name := base[0:codestart]
+ var c bookpipeline.Conf
+ c.Path = path
+ c.Code = base[codestart:]
+ c.Conf = avg
+ confs[name] = append(confs[name], &c)
}
- }
- up <- fn
- logger.Println("Creating best file listing the best file for each page")
- fn = filepath.Join(savedir, "best")
- f, err = os.Create(fn)
- if err != nil {
- close(up)
- errc <- fmt.Errorf("Error creating file %s: %s", fn, err)
- return
- }
- defer f.Close()
- for _, conf := range bestconfs {
- _, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.Path))
- }
- up <- fn
+ fn := filepath.Join(savedir, "conf")
+ logger.Println("Saving confidences in file", fn)
+ f, err := os.Create(fn)
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Error creating file %s: %s", fn, err)
+ return
+ }
+ defer f.Close()
+
+ logger.Println("Finding best confidence for each page, and saving all confidences")
+ for base, conf := range confs {
+ var best float64
+ for _, c := range conf {
+ if c.Conf > best {
+ best = c.Conf
+ bestconfs[base] = c
+ }
+ _, err = fmt.Fprintf(f, "%s\t%02.f\n", c.Path, c.Conf)
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Error writing confidences file: %s", err)
+ return
+ }
+ }
+ }
+ up <- fn
- var pgs []string
- for _, conf := range bestconfs {
- pgs = append(pgs, conf.Path)
- }
- sort.Strings(pgs)
+ logger.Println("Creating best file listing the best file for each page")
+ fn = filepath.Join(savedir, "best")
+ f, err = os.Create(fn)
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Error creating file %s: %s", fn, err)
+ return
+ }
+ defer f.Close()
+ for _, conf := range bestconfs {
+ _, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.Path))
+ }
+ up <- fn
- logger.Println("Downloading binarised and original images to create PDFs")
- bookname, err := filepath.Rel(os.TempDir(), savedir)
- if err != nil {
- close(up)
- errc <- fmt.Errorf("Failed to do filepath.Rel of %s to %s: %s", os.TempDir(), savedir, err)
- return
- }
- colourpdf := new(bookpipeline.Fpdf)
- err = colourpdf.Setup()
- if err != nil {
- close(up)
- errc <- fmt.Errorf("Failed to set up PDF: %s", err)
- return
- }
- binarisedpdf := new(bookpipeline.Fpdf)
- err = binarisedpdf.Setup()
- if err != nil {
- close(up)
- errc <- fmt.Errorf("Failed to set up PDF: %s", err)
- return
- }
- binhascontent, colourhascontent := false, false
- for _, pg := range pgs {
- var colourfn, binfn string
- base := filepath.Base(pg)
- nosuffix := strings.TrimSuffix(base, ".hocr")
- p := strings.SplitN(base, "_bin", 2)
-
- binfn = nosuffix + ".png"
- if len(p) > 1 {
- colourfn = p[0] + ".jpg"
- } else {
- colourfn = nosuffix + ".jpg"
+ var pgs []string
+ for _, conf := range bestconfs {
+ pgs = append(pgs, conf.Path)
}
+ sort.Strings(pgs)
- logger.Println("Downloading binarised page to add to PDF", binfn)
- err := conn.Download(conn.WIPStorageId(), bookname + "/" + binfn, filepath.Join(savedir, binfn))
+ logger.Println("Downloading binarised and original images to create PDFs")
+ bookname, err := filepath.Rel(os.TempDir(), savedir)
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Failed to do filepath.Rel of %s to %s: %s", os.TempDir(), savedir, err)
+ return
+ }
+ colourpdf := new(bookpipeline.Fpdf)
+ err = colourpdf.Setup()
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Failed to set up PDF: %s", err)
+ return
+ }
+ binarisedpdf := new(bookpipeline.Fpdf)
+ err = binarisedpdf.Setup()
if err != nil {
- logger.Println("Download failed; skipping page", binfn)
- } else {
- err = binarisedpdf.AddPage(filepath.Join(savedir, binfn), pg, true)
+ close(up)
+ errc <- fmt.Errorf("Failed to set up PDF: %s", err)
+ return
+ }
+ binhascontent, colourhascontent := false, false
+ for _, pg := range pgs {
+ var colourfn, binfn string
+ base := filepath.Base(pg)
+ nosuffix := strings.TrimSuffix(base, ".hocr")
+ p := strings.SplitN(base, "_bin", 2)
+
+ binfn = nosuffix + ".png"
+ if len(p) > 1 {
+ colourfn = p[0] + ".jpg"
+ } else {
+ colourfn = nosuffix + ".jpg"
+ }
+
+ logger.Println("Downloading binarised page to add to PDF", binfn)
+ err := conn.Download(conn.WIPStorageId(), bookname+"/"+binfn, filepath.Join(savedir, binfn))
if err != nil {
- close(up)
- errc <- fmt.Errorf("Failed to add page %s to PDF: %s", binfn, err)
- return
+ logger.Println("Download failed; skipping page", binfn)
+ } else {
+ err = binarisedpdf.AddPage(filepath.Join(savedir, binfn), pg, true)
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Failed to add page %s to PDF: %s", binfn, err)
+ return
+ }
+ binhascontent = true
}
- binhascontent = true
- }
- logger.Println("Downloading colour page to add to PDF", colourfn)
- err = conn.Download(conn.WIPStorageId(), bookname + "/" + colourfn, filepath.Join(savedir, colourfn))
- if err != nil {
- colourfn = strings.Replace(colourfn, ".jpg", ".png", 1)
- logger.Println("Download failed; trying", colourfn)
- err = conn.Download(conn.WIPStorageId(), bookname + "/" + colourfn, filepath.Join(savedir, colourfn))
+ logger.Println("Downloading colour page to add to PDF", colourfn)
+ err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn))
if err != nil {
- logger.Println("Download failed; skipping page", colourfn)
+ colourfn = strings.Replace(colourfn, ".jpg", ".png", 1)
+ logger.Println("Download failed; trying", colourfn)
+ err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn))
+ if err != nil {
+ logger.Println("Download failed; skipping page", colourfn)
+ }
+ }
+ if err == nil {
+ err = colourpdf.AddPage(filepath.Join(savedir, colourfn), pg, true)
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Failed to add page %s to PDF: %s", colourfn, err)
+ return
+ }
+ colourhascontent = true
}
}
- if err == nil {
- err = colourpdf.AddPage(filepath.Join(savedir, colourfn), pg, true)
+ if colourhascontent {
+ fn = filepath.Join(savedir, bookname+".colour.pdf")
+ err = colourpdf.Save(fn)
if err != nil {
close(up)
- errc <- fmt.Errorf("Failed to add page %s to PDF: %s", colourfn, err)
+ errc <- fmt.Errorf("Failed to save colour pdf: %s", err)
return
}
- colourhascontent = true
+ up <- fn
}
- }
- if colourhascontent {
- fn = filepath.Join(savedir, bookname + ".colour.pdf")
- err = colourpdf.Save(fn)
+ if binhascontent {
+ fn = filepath.Join(savedir, bookname+".binarised.pdf")
+ err = binarisedpdf.Save(fn)
+ if err != nil {
+ close(up)
+ errc <- fmt.Errorf("Failed to save binarised pdf: %s", err)
+ return
+ }
+ up <- fn
+ }
+
+ logger.Println("Creating graph")
+ fn = filepath.Join(savedir, "graph.png")
+ f, err = os.Create(fn)
if err != nil {
close(up)
- errc <- fmt.Errorf("Failed to save colour pdf: %s", err)
+ errc <- fmt.Errorf("Error creating file %s: %s", fn, err)
return
}
- up <- fn
- }
- if binhascontent {
- fn = filepath.Join(savedir, bookname + ".binarised.pdf")
- err = binarisedpdf.Save(fn)
- if err != nil {
+ defer f.Close()
+ err = bookpipeline.Graph(bestconfs, filepath.Base(savedir), f)
+ if err != nil && err.Error() != "Not enough valid confidences" {
close(up)
- errc <- fmt.Errorf("Failed to save binarised pdf: %s", err)
+ errc <- fmt.Errorf("Error rendering graph: %s", err)
return
}
up <- fn
- }
- logger.Println("Creating graph")
- fn = filepath.Join(savedir, "graph.png")
- f, err = os.Create(fn)
- if err != nil {
- close(up)
- errc <- fmt.Errorf("Error creating file %s: %s", fn, err)
- return
- }
- defer f.Close()
- err = bookpipeline.Graph(bestconfs, filepath.Base(savedir), f)
- if err != nil && err.Error() != "Not enough valid confidences" {
close(up)
- errc <- fmt.Errorf("Error rendering graph: %s", err)
- return
- }
- up <- fn
-
- close(up)
}
}
@@ -849,7 +849,7 @@ func main() {
err := cmd.Run()
if err != nil {
conn.Log("Error shutting down, error:", err,
- ", stdout:", stdout.String(), ", stderr:", stderr.String())
+ ", stdout:", stdout.String(), ", stderr:", stderr.String())
}
}
}
diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go
index c23c674..fe2af89 100644
--- a/cmd/getpipelinebook/main.go
+++ b/cmd/getpipelinebook/main.go
@@ -44,8 +44,8 @@ type Pipeliner interface {
}
func getpdfs(conn Pipeliner, l *log.Logger, bookname string) {
- for _, suffix := range []string { ".colour.pdf", ".binarised.pdf" } {
- fn := filepath.Join(bookname, bookname + suffix)
+ for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} {
+ fn := filepath.Join(bookname, bookname+suffix)
l.Println("Downloading PDF", fn)
err := conn.Download(conn.WIPStorageId(), fn, fn)
if err != nil {
@@ -115,7 +115,7 @@ func main() {
}
if *binarisedpdf {
- fn := filepath.Join(bookname, bookname + ".binarised.pdf")
+ fn := filepath.Join(bookname, bookname+".binarised.pdf")
verboselog.Println("Downloading file", fn)
err = conn.Download(conn.WIPStorageId(), fn, fn)
if err != nil {
@@ -124,7 +124,7 @@ func main() {
}
if *colourpdf {
- fn := filepath.Join(bookname, bookname + ".colour.pdf")
+ fn := filepath.Join(bookname, bookname+".colour.pdf")
verboselog.Println("Downloading file", fn)
err = conn.Download(conn.WIPStorageId(), fn, fn)
if err != nil {
diff --git a/cmd/lspipeline/main.go b/cmd/lspipeline/main.go
index 5143e29..659b034 100644
--- a/cmd/lspipeline/main.go
+++ b/cmd/lspipeline/main.go
@@ -116,7 +116,7 @@ func getBookStatus(conn LsPipeliner) (inprogress []string, done []string, err er
}
// Search for graph.png to determine done books (and save the date of it to sort with)
for _, p := range prefixes {
- objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), p + "graph.png")
+ objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), p+"graph.png")
if err != nil || len(objs) == 0 {
inprogressmeta = append(inprogressmeta, bookpipeline.ObjMeta{Name: p})
} else {
diff --git a/graph.go b/graph.go
index 4ca44a7..8d6c857 100644
--- a/graph.go
+++ b/graph.go
@@ -107,14 +107,14 @@ func GraphOpts(confs map[string]*Conf, bookname string, xaxis string, guidelines
final := graphconf[len(graphconf)-1]
ticks[len(ticks)-1] = chart.Tick{final.Pgnum, fmt.Sprintf("%.0f", final.Pgnum)}
for i := 0; i <= yticknum; i++ {
- n := float64(i * 100) / yticknum
+ n := float64(i*100) / yticknum
yticks = append(yticks, chart.Tick{n, fmt.Sprintf("%.1f", n)})
}
mainSeries := chart.ContinuousSeries{
Style: chart.Style{
- StrokeColor: chart.ColorBlue,
- FillColor: chart.ColorAlternateBlue,
+ StrokeColor: chart.ColorBlue,
+ FillColor: chart.ColorAlternateBlue,
},
XValues: xvalues,
YValues: yvalues,
@@ -165,18 +165,18 @@ func GraphOpts(confs map[string]*Conf, bookname string, xaxis string, guidelines
annotations = append(annotations, chart.Value2{Label: fmt.Sprintf("%.0f", highconf), XValue: xvalues[len(xvalues)-1], YValue: highconf})
graph := chart.Chart{
- Title: bookname,
- Width: 3840,
- Height: 2160,
+ Title: bookname,
+ Width: 3840,
+ Height: 2160,
XAxis: chart.XAxis{
- Name: xaxis,
+ Name: xaxis,
Range: &chart.ContinuousRange{
Min: 0.0,
},
Ticks: ticks,
},
YAxis: chart.YAxis{
- Name: "Confidence",
+ Name: "Confidence",
Range: &chart.ContinuousRange{
Min: 0.0,
Max: 100.0,
diff --git a/pdf.go b/pdf.go
index a979807..a03aecc 100644
--- a/pdf.go
+++ b/pdf.go
@@ -22,7 +22,7 @@ import (
)
// TODO: maybe set this in Fpdf struct
-const pageWidth = 5 // pageWidth in inches
+const pageWidth = 5 // pageWidth in inches
const scaleSmaller = 3 // amount the width and height are divided by
// pxToPt converts a pixel value into a pt value (72 pts per inch)
@@ -113,7 +113,7 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string, smaller bool) error {
continue
}
p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1]))
- p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text) + " ", "", 0, "T", false, 0, "")
+ p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text)+" ", "", 0, "T", false, 0, "")
}
}
return p.fpdf.Error()