diff options
| author | Nick White <git@njw.name> | 2019-01-23 21:54:09 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-01-23 21:54:15 +0000 | 
| commit | fb77852acbdbbcedcdb9771770cb6771da002851 (patch) | |
| tree | 233b6cedea313702c994919906d156d829ed378e /line-conf-buckets | |
| parent | d256f967a26ceeb7c3987a1fc447b126a35054f9 (diff) | |
Update line-conf-buckets to mostly use package functions too.
Working now, but needs more consolidation to be worth it.
Diffstat (limited to 'line-conf-buckets')
| -rw-r--r-- | line-conf-buckets/line-conf-buckets.go | 116 | 
1 files changed, 31 insertions, 85 deletions
| diff --git a/line-conf-buckets/line-conf-buckets.go b/line-conf-buckets/line-conf-buckets.go index c2df074..1c33ba4 100644 --- a/line-conf-buckets/line-conf-buckets.go +++ b/line-conf-buckets/line-conf-buckets.go @@ -5,42 +5,23 @@ import (  	"flag"  	"fmt"  	"io" -	"io/ioutil"  	"log"  	"os"  	"path/filepath"  	"sort"  	"strconv"  	"strings" -) - -type LineDetail struct { -	Filename string -	Avgconf float64 -	Filebase string -	Basename string -	Dirname string -	Fulltext string -} - -type LineDetails []LineDetail - -// Used by sort.Sort. -func (l LineDetails) Len() int { return len(l) } -// Used by sort.Sort. -func (l LineDetails) Less(i, j int) bool { -	return l[i].Avgconf < l[j].Avgconf -} - -// Used by sort.Sort. -func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] } +	"git.rescribe.xyz/testingtools/parse" +	"git.rescribe.xyz/testingtools/parse/prob" +) -func copyline(filebase string, dirname string, basename string, avgconf string, outdir string, todir string) (err error) { +// TODO: this is just a placeholder, do this more sensibly, as -tess does (hint: full txt should already be in the LineDetail) +func copyline(filebase string, dirname string, basename string, avgconf string, outdir string, todir string, l parse.LineDetail) (err error) {  	outname := filepath.Join(outdir, todir, filepath.Base(dirname) + "_" + basename + "_" + avgconf)  	//log.Fatalf("I'd use '%s' as outname, and '%s' as filebase\n", outname, filebase) -	for _, extn := range []string{".bin.png", ".txt"} { +	for _, extn := range []string{".txt"} {  		infile, err := os.Open(filebase + extn)  		if err != nil {  			fmt.Fprintf(os.Stderr, "Failed to open %s\n", filebase + extn) @@ -66,6 +47,16 @@ func copyline(filebase string, dirname string, basename string, avgconf string,  		}  	} +	f, err := os.Create(outname + ".bin.png") +	if err != nil { +		return err +	} +	defer f.Close() +	err = l.Img.CopyLineTo(f) +	if err != nil { +		return err +	} +  	return err  } @@ -82,77 +73,28 @@ func main() {  		os.Exit(1)  	} -	lines := make(LineDetails, 0) +	lines := make(parse.LineDetails, 0)  	for _, f := range flag.Args() {  		file, err := os.Open(f)  		if err != nil { -			fmt.Fprintf(os.Stderr, "Error opening %s\n", f)  			log.Fatal(err)  		}  		defer file.Close()  		reader := bufio.NewReader(file) -		totalconf := float64(0) -		num := 0 - -		err = nil -		for err == nil { -			var line string -                        line, err = reader.ReadString('\n') -			fields := strings.Fields(line) - -			if len(fields) == 2 { -				conf, converr := strconv.ParseFloat(fields[1], 64) -				if converr != nil { -					fmt.Fprintf(os.Stderr, "Error: can't convert '%s' to float (full line: %s)\n", fields[1], line) -					continue -				} -				totalconf += conf -				num += 1 -			} +		newlines, err := prob.GetLineDetails(f, reader) +		if err != nil { +			log.Fatal(err)  		} -		avg := totalconf / float64(num) -		// Explicitly close file immediately after use, rather than relying on defer, -		// as too many files could be opened before any of the files are closed, leading -		// to a 'too many open files' error -		// TODO: rewrite this loop so it uses a function or two, so we can rely -		//       on defer sensibly again. +                for _, l := range newlines { +                        lines = append(lines, l) +                } +		// explicitly close the file, so we can be sure we won't run out of +		// handles before defer runs  		file.Close() - -		if num == 0 || avg == 0 { -			continue -		} - -		var linedetail LineDetail -		linedetail.Filename = f -		linedetail.Avgconf = avg -		linedetail.Filebase = strings.Replace(f, ".prob", "", 1) -		linedetail.Basename = filepath.Base(linedetail.Filebase) -		linedetail.Dirname = filepath.Dir(linedetail.Filebase) - -		txtfile, ferr := os.Open(linedetail.Filebase + ".txt") -		if ferr != nil { -			fmt.Fprintf(os.Stderr, "Error opening %s\n", linedetail.Filebase + ".txt") -			log.Fatal(ferr) -		} -		defer txtfile.Close() -		ft, ferr := ioutil.ReadAll(txtfile) -		if ferr != nil { -			fmt.Fprintf(os.Stderr, "Error reading %s\n", linedetail.Filebase + ".txt") -			log.Fatal(ferr) -		} -		linedetail.Fulltext = string(ft) -		// Explicitly close file immediately after use, rather than relying on defer, -		// as too many files could be opened before any of the files are closed, leading -		// to a 'too many open files' error -		// TODO: rewrite this loop so it uses a function or two, so we can rely -		//       on defer sensibly again. -		txtfile.Close() - -		lines = append(lines, linedetail)  	}  	sort.Sort(lines) @@ -178,8 +120,12 @@ func main() {  		}  		avgstr := strconv.FormatFloat(l.Avgconf, 'G', -1, 64) -		avgstr = avgstr[2:] -		err := copyline(l.Filebase, l.Dirname, l.Basename, avgstr, outdir, todir) +		if len(avgstr) > 2 { +			avgstr = avgstr[2:] +		} +		filebase := strings.Replace(l.Name, ".prob", "", 1) +		basename := filepath.Base(filebase) +		err := copyline(filebase, l.OcrName, basename, avgstr, outdir, todir, l)  		if err != nil {  			log.Fatal(err)  		} | 
