diff options
-rw-r--r-- | aws.go | 16 | ||||
-rw-r--r-- | cmd/bookpipeline/main.go | 310 | ||||
-rw-r--r-- | cmd/getpipelinebook/main.go | 8 | ||||
-rw-r--r-- | cmd/lspipeline/main.go | 2 | ||||
-rw-r--r-- | graph.go | 16 | ||||
-rw-r--r-- | pdf.go | 4 |
6 files changed, 178 insertions, 178 deletions
@@ -187,7 +187,7 @@ func (a *AwsConn) QueueHeartbeat(msg Qmsg, qurl string, duration int64) (Qmsg, e VisibilityTimeout: aws.Int64(0), }) - for i := 0; i < int(duration) * 5; i++ { + for i := 0; i < int(duration)*5; i++ { msgResult, err := a.sqssvc.ReceiveMessage(&sqs.ReceiveMessageInput{ MaxNumberOfMessages: aws.Int64(10), VisibilityTimeout: &duration, @@ -292,9 +292,9 @@ func (a *AwsConn) ListObjectsWithMeta(bucket string, prefix string) ([]ObjMeta, func (a *AwsConn) ListObjectPrefixes(bucket string) ([]string, error) { var prefixes []string err := a.s3svc.ListObjectsV2Pages(&s3.ListObjectsV2Input{ - Bucket: aws.String(bucket), + Bucket: aws.String(bucket), Delimiter: aws.String("/"), - MaxKeys: aws.Int64(1), + MaxKeys: aws.Int64(1), }, func(page *s3.ListObjectsV2Output, last bool) bool { for _, r := range page.CommonPrefixes { prefixes = append(prefixes, *r.Prefix) @@ -327,9 +327,9 @@ func (a *AwsConn) CreateQueue(name string) error { _, err := a.sqssvc.CreateQueue(&sqs.CreateQueueInput{ QueueName: aws.String(name), Attributes: map[string]*string{ - "VisibilityTimeout": aws.String("120"), // 2 minutes - "MessageRetentionPeriod": aws.String("1209600"), // 14 days; max allowed by sqs - "ReceiveMessageWaitTimeSeconds": aws.String("20"), + "VisibilityTimeout": aws.String("120"), // 2 minutes + "MessageRetentionPeriod": aws.String("1209600"), // 14 days; max allowed by sqs + "ReceiveMessageWaitTimeSeconds": aws.String("20"), }, }) if err != nil { @@ -339,7 +339,7 @@ func (a *AwsConn) CreateQueue(name string) error { // quietly ignores the CreateQueue request if it is identical to an // existing queue. if ok && aerr.Code() == sqs.ErrCodeQueueNameExists { - return errors.New("Error: Queue already exists but has different attributes:" + name) + return errors.New("Error: Queue already exists but has different attributes:" + name) } else { return errors.New(fmt.Sprintf("Error creating queue %s: %v", name, err)) } @@ -447,7 +447,7 @@ func (a *AwsConn) StartInstances(n int) error { IamInstanceProfile: &ec2.IamInstanceProfileSpecification{ Arn: aws.String(spotProfile), }, - ImageId: aws.String(spotImage), + ImageId: aws.String(spotImage), InstanceType: aws.String(spotType), SecurityGroupIds: []*string{ aws.String(spotSg), diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 7b45541..8d2ffcc 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -13,8 +13,8 @@ import ( "os/exec" "path/filepath" "regexp" - "strings" "sort" + "strings" "time" "rescribe.xyz/bookpipeline" @@ -121,7 +121,7 @@ func upAndQueue(c chan string, done chan bool, toQueue string, conn Pipeliner, b return } logger.Println("Adding", key, training, "to queue", toQueue) - err = conn.AddToQueue(toQueue, key + " " + training) + err = conn.AddToQueue(toQueue, key+" "+training) if err != nil { for range c { } // consume the rest of the receiving channel so it isn't blocked @@ -195,192 +195,192 @@ func ocr(training string) func(chan string, chan string, chan error, *log.Logger func analyse(conn Pipeliner) func(chan string, chan string, chan error, *log.Logger) { return func(toanalyse chan string, up chan string, errc chan error, logger *log.Logger) { - confs := make(map[string][]*bookpipeline.Conf) - bestconfs := make(map[string]*bookpipeline.Conf) - savedir := "" - - for path := range toanalyse { - if savedir == "" { - savedir = filepath.Dir(path) - } - logger.Println("Calculating confidence for", path) - avg, err := hocr.GetAvgConf(path) - if err != nil && err.Error() == "No words found" { - continue - } - if err != nil { - for range toanalyse { - } // consume the rest of the receiving channel so it isn't blocked - close(up) - errc <- fmt.Errorf("Error retreiving confidence for %s: %s", path, err) - return - } - base := filepath.Base(path) - codestart := strings.Index(base, "_bin") - name := base[0:codestart] - var c bookpipeline.Conf - c.Path = path - c.Code = base[codestart:] - c.Conf = avg - confs[name] = append(confs[name], &c) - } - - fn := filepath.Join(savedir, "conf") - logger.Println("Saving confidences in file", fn) - f, err := os.Create(fn) - if err != nil { - close(up) - errc <- fmt.Errorf("Error creating file %s: %s", fn, err) - return - } - defer f.Close() + confs := make(map[string][]*bookpipeline.Conf) + bestconfs := make(map[string]*bookpipeline.Conf) + savedir := "" - logger.Println("Finding best confidence for each page, and saving all confidences") - for base, conf := range confs { - var best float64 - for _, c := range conf { - if c.Conf > best { - best = c.Conf - bestconfs[base] = c + for path := range toanalyse { + if savedir == "" { + savedir = filepath.Dir(path) + } + logger.Println("Calculating confidence for", path) + avg, err := hocr.GetAvgConf(path) + if err != nil && err.Error() == "No words found" { + continue } - _, err = fmt.Fprintf(f, "%s\t%02.f\n", c.Path, c.Conf) if err != nil { + for range toanalyse { + } // consume the rest of the receiving channel so it isn't blocked close(up) - errc <- fmt.Errorf("Error writing confidences file: %s", err) + errc <- fmt.Errorf("Error retreiving confidence for %s: %s", path, err) return } + base := filepath.Base(path) + codestart := strings.Index(base, "_bin") + name := base[0:codestart] + var c bookpipeline.Conf + c.Path = path + c.Code = base[codestart:] + c.Conf = avg + confs[name] = append(confs[name], &c) } - } - up <- fn - logger.Println("Creating best file listing the best file for each page") - fn = filepath.Join(savedir, "best") - f, err = os.Create(fn) - if err != nil { - close(up) - errc <- fmt.Errorf("Error creating file %s: %s", fn, err) - return - } - defer f.Close() - for _, conf := range bestconfs { - _, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.Path)) - } - up <- fn + fn := filepath.Join(savedir, "conf") + logger.Println("Saving confidences in file", fn) + f, err := os.Create(fn) + if err != nil { + close(up) + errc <- fmt.Errorf("Error creating file %s: %s", fn, err) + return + } + defer f.Close() + + logger.Println("Finding best confidence for each page, and saving all confidences") + for base, conf := range confs { + var best float64 + for _, c := range conf { + if c.Conf > best { + best = c.Conf + bestconfs[base] = c + } + _, err = fmt.Fprintf(f, "%s\t%02.f\n", c.Path, c.Conf) + if err != nil { + close(up) + errc <- fmt.Errorf("Error writing confidences file: %s", err) + return + } + } + } + up <- fn - var pgs []string - for _, conf := range bestconfs { - pgs = append(pgs, conf.Path) - } - sort.Strings(pgs) + logger.Println("Creating best file listing the best file for each page") + fn = filepath.Join(savedir, "best") + f, err = os.Create(fn) + if err != nil { + close(up) + errc <- fmt.Errorf("Error creating file %s: %s", fn, err) + return + } + defer f.Close() + for _, conf := range bestconfs { + _, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.Path)) + } + up <- fn - logger.Println("Downloading binarised and original images to create PDFs") - bookname, err := filepath.Rel(os.TempDir(), savedir) - if err != nil { - close(up) - errc <- fmt.Errorf("Failed to do filepath.Rel of %s to %s: %s", os.TempDir(), savedir, err) - return - } - colourpdf := new(bookpipeline.Fpdf) - err = colourpdf.Setup() - if err != nil { - close(up) - errc <- fmt.Errorf("Failed to set up PDF: %s", err) - return - } - binarisedpdf := new(bookpipeline.Fpdf) - err = binarisedpdf.Setup() - if err != nil { - close(up) - errc <- fmt.Errorf("Failed to set up PDF: %s", err) - return - } - binhascontent, colourhascontent := false, false - for _, pg := range pgs { - var colourfn, binfn string - base := filepath.Base(pg) - nosuffix := strings.TrimSuffix(base, ".hocr") - p := strings.SplitN(base, "_bin", 2) - - binfn = nosuffix + ".png" - if len(p) > 1 { - colourfn = p[0] + ".jpg" - } else { - colourfn = nosuffix + ".jpg" + var pgs []string + for _, conf := range bestconfs { + pgs = append(pgs, conf.Path) } + sort.Strings(pgs) - logger.Println("Downloading binarised page to add to PDF", binfn) - err := conn.Download(conn.WIPStorageId(), bookname + "/" + binfn, filepath.Join(savedir, binfn)) + logger.Println("Downloading binarised and original images to create PDFs") + bookname, err := filepath.Rel(os.TempDir(), savedir) + if err != nil { + close(up) + errc <- fmt.Errorf("Failed to do filepath.Rel of %s to %s: %s", os.TempDir(), savedir, err) + return + } + colourpdf := new(bookpipeline.Fpdf) + err = colourpdf.Setup() + if err != nil { + close(up) + errc <- fmt.Errorf("Failed to set up PDF: %s", err) + return + } + binarisedpdf := new(bookpipeline.Fpdf) + err = binarisedpdf.Setup() if err != nil { - logger.Println("Download failed; skipping page", binfn) - } else { - err = binarisedpdf.AddPage(filepath.Join(savedir, binfn), pg, true) + close(up) + errc <- fmt.Errorf("Failed to set up PDF: %s", err) + return + } + binhascontent, colourhascontent := false, false + for _, pg := range pgs { + var colourfn, binfn string + base := filepath.Base(pg) + nosuffix := strings.TrimSuffix(base, ".hocr") + p := strings.SplitN(base, "_bin", 2) + + binfn = nosuffix + ".png" + if len(p) > 1 { + colourfn = p[0] + ".jpg" + } else { + colourfn = nosuffix + ".jpg" + } + + logger.Println("Downloading binarised page to add to PDF", binfn) + err := conn.Download(conn.WIPStorageId(), bookname+"/"+binfn, filepath.Join(savedir, binfn)) if err != nil { - close(up) - errc <- fmt.Errorf("Failed to add page %s to PDF: %s", binfn, err) - return + logger.Println("Download failed; skipping page", binfn) + } else { + err = binarisedpdf.AddPage(filepath.Join(savedir, binfn), pg, true) + if err != nil { + close(up) + errc <- fmt.Errorf("Failed to add page %s to PDF: %s", binfn, err) + return + } + binhascontent = true } - binhascontent = true - } - logger.Println("Downloading colour page to add to PDF", colourfn) - err = conn.Download(conn.WIPStorageId(), bookname + "/" + colourfn, filepath.Join(savedir, colourfn)) - if err != nil { - colourfn = strings.Replace(colourfn, ".jpg", ".png", 1) - logger.Println("Download failed; trying", colourfn) - err = conn.Download(conn.WIPStorageId(), bookname + "/" + colourfn, filepath.Join(savedir, colourfn)) + logger.Println("Downloading colour page to add to PDF", colourfn) + err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn)) if err != nil { - logger.Println("Download failed; skipping page", colourfn) + colourfn = strings.Replace(colourfn, ".jpg", ".png", 1) + logger.Println("Download failed; trying", colourfn) + err = conn.Download(conn.WIPStorageId(), bookname+"/"+colourfn, filepath.Join(savedir, colourfn)) + if err != nil { + logger.Println("Download failed; skipping page", colourfn) + } + } + if err == nil { + err = colourpdf.AddPage(filepath.Join(savedir, colourfn), pg, true) + if err != nil { + close(up) + errc <- fmt.Errorf("Failed to add page %s to PDF: %s", colourfn, err) + return + } + colourhascontent = true } } - if err == nil { - err = colourpdf.AddPage(filepath.Join(savedir, colourfn), pg, true) + if colourhascontent { + fn = filepath.Join(savedir, bookname+".colour.pdf") + err = colourpdf.Save(fn) if err != nil { close(up) - errc <- fmt.Errorf("Failed to add page %s to PDF: %s", colourfn, err) + errc <- fmt.Errorf("Failed to save colour pdf: %s", err) return } - colourhascontent = true + up <- fn } - } - if colourhascontent { - fn = filepath.Join(savedir, bookname + ".colour.pdf") - err = colourpdf.Save(fn) + if binhascontent { + fn = filepath.Join(savedir, bookname+".binarised.pdf") + err = binarisedpdf.Save(fn) + if err != nil { + close(up) + errc <- fmt.Errorf("Failed to save binarised pdf: %s", err) + return + } + up <- fn + } + + logger.Println("Creating graph") + fn = filepath.Join(savedir, "graph.png") + f, err = os.Create(fn) if err != nil { close(up) - errc <- fmt.Errorf("Failed to save colour pdf: %s", err) + errc <- fmt.Errorf("Error creating file %s: %s", fn, err) return } - up <- fn - } - if binhascontent { - fn = filepath.Join(savedir, bookname + ".binarised.pdf") - err = binarisedpdf.Save(fn) - if err != nil { + defer f.Close() + err = bookpipeline.Graph(bestconfs, filepath.Base(savedir), f) + if err != nil && err.Error() != "Not enough valid confidences" { close(up) - errc <- fmt.Errorf("Failed to save binarised pdf: %s", err) + errc <- fmt.Errorf("Error rendering graph: %s", err) return } up <- fn - } - logger.Println("Creating graph") - fn = filepath.Join(savedir, "graph.png") - f, err = os.Create(fn) - if err != nil { - close(up) - errc <- fmt.Errorf("Error creating file %s: %s", fn, err) - return - } - defer f.Close() - err = bookpipeline.Graph(bestconfs, filepath.Base(savedir), f) - if err != nil && err.Error() != "Not enough valid confidences" { close(up) - errc <- fmt.Errorf("Error rendering graph: %s", err) - return - } - up <- fn - - close(up) } } @@ -849,7 +849,7 @@ func main() { err := cmd.Run() if err != nil { conn.Log("Error shutting down, error:", err, - ", stdout:", stdout.String(), ", stderr:", stderr.String()) + ", stdout:", stdout.String(), ", stderr:", stderr.String()) } } } diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index c23c674..fe2af89 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -44,8 +44,8 @@ type Pipeliner interface { } func getpdfs(conn Pipeliner, l *log.Logger, bookname string) { - for _, suffix := range []string { ".colour.pdf", ".binarised.pdf" } { - fn := filepath.Join(bookname, bookname + suffix) + for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { + fn := filepath.Join(bookname, bookname+suffix) l.Println("Downloading PDF", fn) err := conn.Download(conn.WIPStorageId(), fn, fn) if err != nil { @@ -115,7 +115,7 @@ func main() { } if *binarisedpdf { - fn := filepath.Join(bookname, bookname + ".binarised.pdf") + fn := filepath.Join(bookname, bookname+".binarised.pdf") verboselog.Println("Downloading file", fn) err = conn.Download(conn.WIPStorageId(), fn, fn) if err != nil { @@ -124,7 +124,7 @@ func main() { } if *colourpdf { - fn := filepath.Join(bookname, bookname + ".colour.pdf") + fn := filepath.Join(bookname, bookname+".colour.pdf") verboselog.Println("Downloading file", fn) err = conn.Download(conn.WIPStorageId(), fn, fn) if err != nil { diff --git a/cmd/lspipeline/main.go b/cmd/lspipeline/main.go index 5143e29..659b034 100644 --- a/cmd/lspipeline/main.go +++ b/cmd/lspipeline/main.go @@ -116,7 +116,7 @@ func getBookStatus(conn LsPipeliner) (inprogress []string, done []string, err er } // Search for graph.png to determine done books (and save the date of it to sort with) for _, p := range prefixes { - objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), p + "graph.png") + objs, err := conn.ListObjectsWithMeta(conn.WIPStorageId(), p+"graph.png") if err != nil || len(objs) == 0 { inprogressmeta = append(inprogressmeta, bookpipeline.ObjMeta{Name: p}) } else { @@ -107,14 +107,14 @@ func GraphOpts(confs map[string]*Conf, bookname string, xaxis string, guidelines final := graphconf[len(graphconf)-1] ticks[len(ticks)-1] = chart.Tick{final.Pgnum, fmt.Sprintf("%.0f", final.Pgnum)} for i := 0; i <= yticknum; i++ { - n := float64(i * 100) / yticknum + n := float64(i*100) / yticknum yticks = append(yticks, chart.Tick{n, fmt.Sprintf("%.1f", n)}) } mainSeries := chart.ContinuousSeries{ Style: chart.Style{ - StrokeColor: chart.ColorBlue, - FillColor: chart.ColorAlternateBlue, + StrokeColor: chart.ColorBlue, + FillColor: chart.ColorAlternateBlue, }, XValues: xvalues, YValues: yvalues, @@ -165,18 +165,18 @@ func GraphOpts(confs map[string]*Conf, bookname string, xaxis string, guidelines annotations = append(annotations, chart.Value2{Label: fmt.Sprintf("%.0f", highconf), XValue: xvalues[len(xvalues)-1], YValue: highconf}) graph := chart.Chart{ - Title: bookname, - Width: 3840, - Height: 2160, + Title: bookname, + Width: 3840, + Height: 2160, XAxis: chart.XAxis{ - Name: xaxis, + Name: xaxis, Range: &chart.ContinuousRange{ Min: 0.0, }, Ticks: ticks, }, YAxis: chart.YAxis{ - Name: "Confidence", + Name: "Confidence", Range: &chart.ContinuousRange{ Min: 0.0, Max: 100.0, @@ -22,7 +22,7 @@ import ( ) // TODO: maybe set this in Fpdf struct -const pageWidth = 5 // pageWidth in inches +const pageWidth = 5 // pageWidth in inches const scaleSmaller = 3 // amount the width and height are divided by // pxToPt converts a pixel value into a pt value (72 pts per inch) @@ -113,7 +113,7 @@ func (p *Fpdf) AddPage(imgpath, hocrpath string, smaller bool) error { continue } p.fpdf.SetXY(pxToPt(coords[0]), pxToPt(coords[1])) - p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text) + " ", "", 0, "T", false, 0, "") + p.fpdf.CellFormat(pxToPt(coords[2]), pxToPt(coords[3]), html.UnescapeString(w.Text)+" ", "", 0, "T", false, 0, "") } } return p.fpdf.Error() |