diff options
| author | Nick White <git@njw.name> | 2019-08-28 17:15:55 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-08-28 17:15:55 +0100 | 
| commit | c692d23044f914abffcd21302c3e152e19ab82af (patch) | |
| tree | 22284c83f71bc565bb1597713ae57ddf16b29b9b | |
| parent | 7d13974a4a79c4175869c2ef227d04617a8ea515 (diff) | |
Separate graph creation from analyse().
| -rw-r--r-- | bookpipeline/main.go | 190 | 
1 files changed, 106 insertions, 84 deletions
| diff --git a/bookpipeline/main.go b/bookpipeline/main.go index cb02f5e..de38ab4 100644 --- a/bookpipeline/main.go +++ b/bookpipeline/main.go @@ -7,6 +7,7 @@ import (  	"errors"  	"flag"  	"fmt" +	"io"  	"log"  	"os"  	"os/exec" @@ -40,6 +41,9 @@ one is found this general process is followed:  ` +const maxticks = 20 +const cutoff = 70 +  // null writer to enable non-verbose logging to be discarded  type NullWriter bool @@ -143,85 +147,14 @@ type GraphConf struct {  	pgnum, conf float64  } -func analyse(toanalyse chan string, up chan string, errc chan error, logger *log.Logger) { -	confs := make(map[string][]*Conf) -	bestconfs := make(map[string]*Conf) -	savedir := "" - -	for path := range toanalyse { -		if savedir == "" { -			savedir = filepath.Dir(path) -		} -		logger.Println("Calculating confidence for", path) -		avg, err := hocr.GetAvgConf(path) -		if err != nil { -			close(up) -			errc <- errors.New(fmt.Sprintf("Error retreiving confidence for %s: %s", path, err)) -			return -		} -		base := filepath.Base(path) -		codestart := strings.Index(base, "_bin") -		name := base[0:codestart] -		var c Conf -		c.path = path -		c.code = base[codestart:] -		c.conf = avg -		confs[name] = append(confs[name], &c) - -	} - -	fn := filepath.Join(savedir, "conf") -	logger.Println("Saving confidences in file", fn) -	f, err := os.Create(fn) -	if err != nil { -		close(up) -		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err)) -		return -	} -	defer f.Close() - -	logger.Println("Finding best confidence for each page, and saving all confidences") -	for base, conf := range confs { -		var best float64 -		for _, c := range conf { -			if c.conf > best { -				best = c.conf -				bestconfs[base] = c -			} -			_, err = fmt.Fprintf(f, "%s\t%02.f\n", c.path, c.conf) -			if err != nil { -				close(up) -				errc <- errors.New(fmt.Sprintf("Error writing confidences file: %s", err)) -				return -			} -		} -	} -	up <- fn - -	logger.Println("Creating best file listing the best file for each page") -	fn = filepath.Join(savedir, "best") -	f, err = os.Create(fn) -	if err != nil { -		close(up) -		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err)) -		return -	} -	defer f.Close() -	for _, conf := range bestconfs { -		_, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.path)) -	} -	up <- fn - -	// TODO: move this graph stuff out into its own file, it's pretty big -	logger.Println("Creating graph") +func graph(confs map[string]*Conf, bookname string, w io.Writer) (error) { +	// Organise confs to sort them by page  	var graphconf []GraphConf -	// organise bestconfs to sort them by page -	for _, conf := range bestconfs { +	for _, conf := range confs {  		name := filepath.Base(conf.path)  		numend := strings.Index(name, "_")  		pgnum, err := strconv.ParseFloat(name[0:numend], 64)  		if err != nil { -			logger.Printf("Failed to convert %s to float, excluding from graph\n", name[0:numend])  			continue  		}  		var c GraphConf @@ -230,16 +163,18 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  		graphconf = append(graphconf, c)  	}  	sort.Slice(graphconf, func(i, j int) bool { return graphconf[i].pgnum < graphconf[j].pgnum }) + +	// Create main xvalues and yvalues, annotations and ticks  	var xvalues, yvalues []float64  	var annotations []chart.Value2  	var ticks []chart.Tick  	i := 0 -	tickevery := len(graphconf) / 20 +	tickevery := len(graphconf) / maxticks  	for _, c := range graphconf {  		i = i + 1  		xvalues = append(xvalues, c.pgnum)  		yvalues = append(yvalues, c.conf) -		if c.conf < 70 { +		if c.conf < cutoff {  			annotations = append(annotations, chart.Value2{Label: fmt.Sprintf("%.0f", c.pgnum), XValue: c.pgnum, YValue: c.conf})  		}  		if tickevery % i == 0 { @@ -251,10 +186,24 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  		YValues: yvalues,  	} -	// remove outliers at 10% of max and min confidence to use for dotted lines +	// Create 70% line +	yvalues = []float64{} +	for _, _ = range xvalues { +		yvalues = append(yvalues, cutoff) +	} +	cutoffSeries := chart.ContinuousSeries{ +		XValues: xvalues, +		YValues: yvalues, +		Style: chart.Style{ +			Show:            true, +			StrokeColor:     chart.ColorAlternateGreen, +			StrokeDashArray: []float64{10.0, 5.0}, +		}, +	} + +	// Create lines marking top and bottom 10% confidence  	sort.Slice(graphconf, func(i, j int) bool { return graphconf[i].conf < graphconf[j].conf })  	cutoff := int(len(graphconf) / 10) -	logger.Printf("cutoff is %d, from %d\n", cutoff, len(graphconf))  	mostconf := graphconf[cutoff:len(graphconf)-cutoff]  	sort.Slice(mostconf, func(i, j int) bool { return mostconf[i].pgnum < mostconf[j].pgnum })  	xvalues = []float64{} @@ -284,8 +233,11 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  		InnerSeries: mostSeries,  	} -	// TODO: add number of words series using yaxissecondary  	graph := chart.Chart{ +		Title: fmt.Sprintf("Confidence of pages from %s", bookname), +		TitleStyle: chart.StyleShow(), +		Width: 1920, +		Height: 1080,  		XAxis: chart.XAxis{  			Name: "Page number",  			NameStyle: chart.StyleShow(), @@ -304,14 +256,11 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  				Max: 100.0,  			},  		}, -		//YAxisSecondary: chart.YAxis{ -		//	Name: "Number of words", -		//	Style: chart.StyleShow(), -		//},  		Series: []chart.Series{  			mainSeries,  			minSeries,  			maxSeries, +			cutoffSeries,  			chart.LastValueAnnotation(minSeries),  			chart.LastValueAnnotation(maxSeries),  			chart.AnnotationSeries{ @@ -324,6 +273,79 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  			//},  		},  	} +	return graph.Render(chart.PNG, w) +} + +func analyse(toanalyse chan string, up chan string, errc chan error, logger *log.Logger) { +	confs := make(map[string][]*Conf) +	bestconfs := make(map[string]*Conf) +	savedir := "" + +	for path := range toanalyse { +		if savedir == "" { +			savedir = filepath.Dir(path) +		} +		logger.Println("Calculating confidence for", path) +		avg, err := hocr.GetAvgConf(path) +		if err != nil { +			close(up) +			errc <- errors.New(fmt.Sprintf("Error retreiving confidence for %s: %s", path, err)) +			return +		} +		base := filepath.Base(path) +		codestart := strings.Index(base, "_bin") +		name := base[0:codestart] +		var c Conf +		c.path = path +		c.code = base[codestart:] +		c.conf = avg +		confs[name] = append(confs[name], &c) + +	} + +	fn := filepath.Join(savedir, "conf") +	logger.Println("Saving confidences in file", fn) +	f, err := os.Create(fn) +	if err != nil { +		close(up) +		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err)) +		return +	} +	defer f.Close() + +	logger.Println("Finding best confidence for each page, and saving all confidences") +	for base, conf := range confs { +		var best float64 +		for _, c := range conf { +			if c.conf > best { +				best = c.conf +				bestconfs[base] = c +			} +			_, err = fmt.Fprintf(f, "%s\t%02.f\n", c.path, c.conf) +			if err != nil { +				close(up) +				errc <- errors.New(fmt.Sprintf("Error writing confidences file: %s", err)) +				return +			} +		} +	} +	up <- fn + +	logger.Println("Creating best file listing the best file for each page") +	fn = filepath.Join(savedir, "best") +	f, err = os.Create(fn) +	if err != nil { +		close(up) +		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err)) +		return +	} +	defer f.Close() +	for _, conf := range bestconfs { +		_, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.path)) +	} +	up <- fn + +	logger.Println("Creating graph")  	fn = filepath.Join(savedir, "graph.png")  	f, err = os.Create(fn)  	if err != nil { @@ -332,7 +354,7 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  		return  	}  	defer f.Close() -	err = graph.Render(chart.PNG, f) +	err = graph(bestconfs, filepath.Base(savedir), f)  	if err != nil {  		close(up)  		errc <- errors.New(fmt.Sprintf("Error rendering graph: %s", err)) | 
