diff options
| author | Nick White <git@njw.name> | 2019-08-27 17:18:41 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-08-27 17:18:41 +0100 | 
| commit | 2d6a3a3ff743f24f29a0b8d8369911906e8eae3c (patch) | |
| tree | 49d66183af54da02e8f2e13b1a1902f190fd082d /bookpipeline/main.go | |
| parent | f72c788235f4f0a3bb88cfcdb0911564e67259d8 (diff) | |
Add basic graphing (still work to do, but basics are working)
Diffstat (limited to 'bookpipeline/main.go')
| -rw-r--r-- | bookpipeline/main.go | 145 | 
1 files changed, 133 insertions, 12 deletions
| diff --git a/bookpipeline/main.go b/bookpipeline/main.go index 4596171..63eb6bc 100644 --- a/bookpipeline/main.go +++ b/bookpipeline/main.go @@ -12,9 +12,13 @@ import (  	"os/exec"  	"path/filepath"  	"regexp" +	"sort" +	"strconv"  	"strings"  	"time" +	"github.com/wcharczuk/go-chart" +  	"rescribe.xyz/go.git/lib/hocr"  	"rescribe.xyz/go.git/preproc"  ) @@ -135,6 +139,9 @@ type Conf struct {  	path, code string  	conf float64  } +type GraphConf struct { +	pgnum, conf float64 +}  func analyse(toanalyse chan string, up chan string, errc chan error, logger *log.Logger) {  	confs := make(map[string][]*Conf) @@ -163,12 +170,12 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  	} -	conffn := filepath.Join(savedir, "conf") -	logger.Println("Saving confidences in file", conffn) -	f, err := os.Create(conffn) +	fn := filepath.Join(savedir, "conf") +	logger.Println("Saving confidences in file", fn) +	f, err := os.Create(fn)  	if err != nil {  		close(up) -		errc <- errors.New(fmt.Sprintf("Error creating conf file %s: %s", conffn, err)) +		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err))  		return  	}  	defer f.Close() @@ -189,23 +196,138 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log  			}  		}  	} -	up <- conffn +	up <- fn  	logger.Println("Creating best file listing the best file for each page") -	bestfn := filepath.Join(savedir, "best") -	f, err = os.Create(bestfn) +	fn = filepath.Join(savedir, "best") +	f, err = os.Create(fn)  	if err != nil {  		close(up) -		errc <- errors.New(fmt.Sprintf("Error creating best file %s: %s", bestfn, err)) +		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err))  		return  	}  	defer f.Close()  	for _, conf := range bestconfs {  		_, err = fmt.Fprintf(f, "%s\n", filepath.Base(conf.path))  	} -	up <- bestfn +	up <- fn + +	// TODO: move this graph stuff out into its own file, it's pretty big +	logger.Println("Creating graph") +	var graphconf []GraphConf +	// organise bestconfs to sort them by page +	for _, conf := range bestconfs { +		name := filepath.Base(conf.path) +		numend := strings.Index(name, "_") +		pgnum, err := strconv.ParseFloat(name[0:numend], 64) +		if err != nil { +			logger.Printf("Failed to convert %s to float, excluding from graph\n", name[0:numend]) +			continue +		} +		var c GraphConf +		c.pgnum = pgnum +		c.conf = conf.conf +		graphconf = append(graphconf, c) +	} +	sort.Slice(graphconf, func(i, j int) bool { return graphconf[i].pgnum < graphconf[j].pgnum }) +	var xvalues, yvalues []float64 +	for _, c := range graphconf { +		xvalues = append(xvalues, c.pgnum) +		yvalues = append(yvalues, c.conf) +	} +	mainSeries := chart.ContinuousSeries{ +		XValues: xvalues, +		YValues: yvalues, +	} + +	// remove outliers at 10% of max and min confidence to use for dotted lines +	sort.Slice(graphconf, func(i, j int) bool { return graphconf[i].conf < graphconf[j].conf }) +	cutoff := int(len(graphconf) / 10) +	logger.Printf("cutoff is %d, from %d\n", cutoff, len(graphconf)) +	mostconf := graphconf[cutoff:len(graphconf)-cutoff] +	sort.Slice(mostconf, func(i, j int) bool { return mostconf[i].pgnum < mostconf[j].pgnum }) +	xvalues = []float64{} +	yvalues = []float64{} +	for _, c := range mostconf { +		xvalues = append(xvalues, c.pgnum) +		yvalues = append(yvalues, c.conf) +	} +	mostSeries := chart.ContinuousSeries{ +		XValues: xvalues, +		YValues: yvalues, +	} +	minSeries := &chart.MinSeries{ +		Style: chart.Style{ +			Show:            true, +			StrokeColor:     chart.ColorAlternateGray, +			StrokeDashArray: []float64{5.0, 5.0}, +		}, +		InnerSeries: mostSeries, +	} +	maxSeries := &chart.MaxSeries{ +		Style: chart.Style{ +			Show:            true, +			StrokeColor:     chart.ColorAlternateGray, +			StrokeDashArray: []float64{5.0, 5.0}, +		}, +		InnerSeries: mostSeries, +	} + +	// TODO: annotate all values below 70%; see +	// https://github.com/wcharczuk/go-chart/blob/master/_examples/annotations/main.go + +	// TODO: add number of words series using yaxissecondary +	graph := chart.Chart{ +		XAxis: chart.XAxis{ +			Name: "Page number", +			NameStyle: chart.StyleShow(), +			Style: chart.StyleShow(), +			Range: &chart.ContinuousRange{ +				Min: 0.0, +			}, +		}, +		YAxis: chart.YAxis{ +			Name: "Confidence", +			NameStyle: chart.StyleShow(), +			Style: chart.StyleShow(), +			Range: &chart.ContinuousRange{ +				Min: 0.0, +				Max: 100.0, +			}, +		}, +		//YAxisSecondary: chart.YAxis{ +		//	Name: "Number of words", +		//	Style: chart.StyleShow(), +		//}, +		Series: []chart.Series{ +			mainSeries, +			minSeries, +			maxSeries, +			chart.LastValueAnnotation(minSeries), +			chart.LastValueAnnotation(maxSeries), +			//chart.ContinuousSeries{ +			//	YAxis: chart.YAxisSecondary, +			//	XValues: xvalues, +			//	YValues: yvalues, +			//}, +		}, +	} +	fn = filepath.Join(savedir, "graph.png") +	f, err = os.Create(fn) +	if err != nil { +		close(up) +		errc <- errors.New(fmt.Sprintf("Error creating file %s: %s", fn, err)) +		return +	} +	defer f.Close() +	err = graph.Render(chart.PNG, f) +	if err != nil { +		close(up) +		errc <- errors.New(fmt.Sprintf("Error rendering graph: %s", err)) +		return +	} +	up <- fn -	// TODO: plot a graph with the confs, using https://github.com/gonum/plot, send to up  	// TODO: generate a general report.txt with statistics etc for the book, send to up  	close(up) @@ -308,8 +430,7 @@ func main() {  		verboselog = log.New(n, "", log.LstdFlags)  	} -	// TODO: match jpg too -	origPattern := regexp.MustCompile(`[0-9]{4}.jpg$`) // TODO: match other file naming +	origPattern := regexp.MustCompile(`[0-9]{4}.jpg$`) // TODO: match alternative file naming  	preprocessedPattern := regexp.MustCompile(`_bin[0-9].[0-9].png$`)  	ocredPattern := regexp.MustCompile(`.hocr$`) | 
