Separate graph creation from analyse().

author: Nick White <git@njw.name> 2019-08-28 17:15:55 +0100
committer: Nick White <git@njw.name> 2019-08-28 17:15:55 +0100
commit: c692d23044f914abffcd21302c3e152e19ab82af (patch)
tree: 22284c83f71bc565bb1597713ae57ddf16b29b9b
parent: 7d13974a4a79c4175869c2ef227d04617a8ea515 (diff)
1 files changed, 106 insertions, 84 deletions
diff --git a/bookpipeline/main.go b/bookpipeline/main.go
index cb02f5e..de38ab4 100644
--- a/bookpipeline/main.go
+++ b/bookpipeline/main.go
@@ -7,6 +7,7 @@ import (
 	"errors"
 	"flag"
 	"fmt"
+	"io"
 	"log"
 	"os"
 	"os/exec"
@@ -40,6 +41,9 @@ one is found this general process is followed:
 
 `
 
+const maxticks = 20
+const cutoff = 70
+
 // null writer to enable non-verbose logging to be discarded
 type NullWriter bool
 
@@ -143,85 +147,14 @@ type GraphConf struct {
 	pgnum, conf float64
 }
 
-func analyse(toanalyse chan string, up chan string, errc chan error, logger *log.Logger) {
-	confs := make(map[string][]*Conf)
-	bestconfs := make(map[string]*Conf)
-	savedir := ""
-
-	for path := range toanalyse {
-		if savedir == "" {
-			savedir = filepath.Dir(path)
-		}
-		logger.Println("Calculating confidence for", path)
-		avg, err := hocr.GetAvgConf(path)
-		if err != nil {
-			close(up)
-			errc <- errors.New(fmt.Sprintf("Error retreiving confidence for %s: %s", path, err))
-			return
-		}
-		base := filepath.Base(path)
-		codestart := strings.Index(base, "_bin")
-		name := base[0:codestart]
-		var c Conf
-		c.path = path
-		c.code = base[codestart:]
-		c.conf = avg
-		confs[name] = append(confs[name], &c)
-
-	}
-
-	fn := filepath.Join(savedir, "conf")
-	logger.Println("Saving confidences in file", fn)
-	f, err := os.Create(fn)
-	if err != nil {
-		close(up)
-		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err))
-		return
-	}
-	defer f.Close()
-
-	logger.Println("Finding best confidence for each page, and saving all confidences")
-	for base, conf := range confs {
-		var best float64
-		for _, c := range conf {
-			if c.conf > best {
-				best = c.conf
-				bestconfs[base] = c
-			}
-			_, err = fmt.Fprintf(f, "%s\t%02.f\n", c.path, c.conf)
-			if err != nil {
-				close(up)
-				errc <- errors.New(fmt.Sprintf("Error writing confidences file: %s", err))
-				return
-			}
-		}
-	}
-	up <- fn
-
-	logger.Println("Creating best file listing the best file for each page")
-	fn = filepath.Join(savedir, "best")
-	f, err = os.Create(fn)
-	if err != nil {
-		close(up)
-		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err))
-		return
-	}
-	defer f.Close()
-	for _, conf := range bestconfs {
-		_, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.path))
-	}
-	up <- fn
-
-	// TODO: move this graph stuff out into its own file, it's pretty big
-	logger.Println("Creating graph")
+func graph(confs map[string]*Conf, bookname string, w io.Writer) (error) {
+	// Organise confs to sort them by page
 	var graphconf []GraphConf
-	// organise bestconfs to sort them by page
-	for _, conf := range bestconfs {
+	for _, conf := range confs {
 		name := filepath.Base(conf.path)
 		numend := strings.Index(name, "_")
 		pgnum, err := strconv.ParseFloat(name[0:numend], 64)
 		if err != nil {
-			logger.Printf("Failed to convert %s to float, excluding from graph\n", name[0:numend])
 			continue
 		}
 		var c GraphConf
@@ -230,16 +163,18 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
 		graphconf = append(graphconf, c)
 	}
 	sort.Slice(graphconf, func(i, j int) bool { return graphconf[i].pgnum < graphconf[j].pgnum })
+
+	// Create main xvalues and yvalues, annotations and ticks
 	var xvalues, yvalues []float64
 	var annotations []chart.Value2
 	var ticks []chart.Tick
 	i := 0
-	tickevery := len(graphconf) / 20
+	tickevery := len(graphconf) / maxticks
 	for _, c := range graphconf {
 		i = i + 1
 		xvalues = append(xvalues, c.pgnum)
 		yvalues = append(yvalues, c.conf)
-		if c.conf < 70 {
+		if c.conf < cutoff {
 			annotations = append(annotations, chart.Value2{Label: fmt.Sprintf("%.0f", c.pgnum), XValue: c.pgnum, YValue: c.conf})
 		}
 		if tickevery % i == 0 {
@@ -251,10 +186,24 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
 		YValues: yvalues,
 	}
 
-	// remove outliers at 10% of max and min confidence to use for dotted lines
+	// Create 70% line
+	yvalues = []float64{}
+	for _, _ = range xvalues {
+		yvalues = append(yvalues, cutoff)
+	}
+	cutoffSeries := chart.ContinuousSeries{
+		XValues: xvalues,
+		YValues: yvalues,
+		Style: chart.Style{
+			Show:            true,
+			StrokeColor:     chart.ColorAlternateGreen,
+			StrokeDashArray: []float64{10.0, 5.0},
+		},
+	}
+
+	// Create lines marking top and bottom 10% confidence
 	sort.Slice(graphconf, func(i, j int) bool { return graphconf[i].conf < graphconf[j].conf })
 	cutoff := int(len(graphconf) / 10)
-	logger.Printf("cutoff is %d, from %d\n", cutoff, len(graphconf))
 	mostconf := graphconf[cutoff:len(graphconf)-cutoff]
 	sort.Slice(mostconf, func(i, j int) bool { return mostconf[i].pgnum < mostconf[j].pgnum })
 	xvalues = []float64{}
@@ -284,8 +233,11 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
 		InnerSeries: mostSeries,
 	}
 
-	// TODO: add number of words series using yaxissecondary
 	graph := chart.Chart{
+		Title: fmt.Sprintf("Confidence of pages from %s", bookname),
+		TitleStyle: chart.StyleShow(),
+		Width: 1920,
+		Height: 1080,
 		XAxis: chart.XAxis{
 			Name: "Page number",
 			NameStyle: chart.StyleShow(),
@@ -304,14 +256,11 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
 				Max: 100.0,
 			},
 		},
-		//YAxisSecondary: chart.YAxis{
-		//	Name: "Number of words",
-		//	Style: chart.StyleShow(),
-		//},
 		Series: []chart.Series{
 			mainSeries,
 			minSeries,
 			maxSeries,
+			cutoffSeries,
 			chart.LastValueAnnotation(minSeries),
 			chart.LastValueAnnotation(maxSeries),
 			chart.AnnotationSeries{
@@ -324,6 +273,79 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
 			//},
 		},
 	}
+	return graph.Render(chart.PNG, w)
+}
+
+func analyse(toanalyse chan string, up chan string, errc chan error, logger *log.Logger) {
+	confs := make(map[string][]*Conf)
+	bestconfs := make(map[string]*Conf)
+	savedir := ""
+
+	for path := range toanalyse {
+		if savedir == "" {
+			savedir = filepath.Dir(path)
+		}
+		logger.Println("Calculating confidence for", path)
+		avg, err := hocr.GetAvgConf(path)
+		if err != nil {
+			close(up)
+			errc <- errors.New(fmt.Sprintf("Error retreiving confidence for %s: %s", path, err))
+			return
+		}
+		base := filepath.Base(path)
+		codestart := strings.Index(base, "_bin")
+		name := base[0:codestart]
+		var c Conf
+		c.path = path
+		c.code = base[codestart:]
+		c.conf = avg
+		confs[name] = append(confs[name], &c)
+
+	}
+
+	fn := filepath.Join(savedir, "conf")
+	logger.Println("Saving confidences in file", fn)
+	f, err := os.Create(fn)
+	if err != nil {
+		close(up)
+		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err))
+		return
+	}
+	defer f.Close()
+
+	logger.Println("Finding best confidence for each page, and saving all confidences")
+	for base, conf := range confs {
+		var best float64
+		for _, c := range conf {
+			if c.conf > best {
+				best = c.conf
+				bestconfs[base] = c
+			}
+			_, err = fmt.Fprintf(f, "%s\t%02.f\n", c.path, c.conf)
+			if err != nil {
+				close(up)
+				errc <- errors.New(fmt.Sprintf("Error writing confidences file: %s", err))
+				return
+			}
+		}
+	}
+	up <- fn
+
+	logger.Println("Creating best file listing the best file for each page")
+	fn = filepath.Join(savedir, "best")
+	f, err = os.Create(fn)
+	if err != nil {
+		close(up)
+		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err))
+		return
+	}
+	defer f.Close()
+	for _, conf := range bestconfs {
+		_, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.path))
+	}
+	up <- fn
+
+	logger.Println("Creating graph")
 	fn = filepath.Join(savedir, "graph.png")
 	f, err = os.Create(fn)
 	if err != nil {
@@ -332,7 +354,7 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
 		return
 	}
 	defer f.Close()
-	err = graph.Render(chart.PNG, f)
+	err = graph(bestconfs, filepath.Base(savedir), f)
 	if err != nil {
 		close(up)
 		errc <- errors.New(fmt.Sprintf("Error rendering graph: %s", err))
author	Nick White <git@njw.name>	2019-08-28 17:15:55 +0100
committer	Nick White <git@njw.name>	2019-08-28 17:15:55 +0100
commit	c692d23044f914abffcd21302c3e152e19ab82af (patch)
tree	22284c83f71bc565bb1597713ae57ddf16b29b9b
parent	7d13974a4a79c4175869c2ef227d04617a8ea515 (diff)