diff options
| author | Nick White <git@njw.name> | 2019-01-23 21:54:09 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-01-23 21:54:15 +0000 | 
| commit | fb77852acbdbbcedcdb9771770cb6771da002851 (patch) | |
| tree | 233b6cedea313702c994919906d156d829ed378e | |
| parent | d256f967a26ceeb7c3987a1fc447b126a35054f9 (diff) | |
Update line-conf-buckets to mostly use package functions too.
Working now, but needs more consolidation to be worth it.
| -rw-r--r-- | line-conf-buckets-tess/line-conf-buckets-tess.go | 4 | ||||
| -rw-r--r-- | line-conf-buckets/line-conf-buckets.go | 116 | ||||
| -rw-r--r-- | parse/hocr/hocr.go | 5 | ||||
| -rw-r--r-- | parse/line.go | 18 | ||||
| -rw-r--r-- | parse/prob/prob.go | 72 | 
5 files changed, 125 insertions, 90 deletions
| diff --git a/line-conf-buckets-tess/line-conf-buckets-tess.go b/line-conf-buckets-tess/line-conf-buckets-tess.go index b24bdec..8abdff3 100644 --- a/line-conf-buckets-tess/line-conf-buckets-tess.go +++ b/line-conf-buckets-tess/line-conf-buckets-tess.go @@ -92,7 +92,7 @@ func main() {  		avgstr := strconv.FormatFloat(l.Avgconf, 'f', 5, 64)  		avgstr = avgstr[2:] -		outname := filepath.Join(outdir, todir, l.Hocrname + "_" + l.Name + "_" + avgstr + ".png") +		outname := filepath.Join(outdir, todir, l.OcrName + "_" + l.Name + "_" + avgstr + ".png")  		err := os.MkdirAll(filepath.Join(outdir, todir), 0700)  		if err != nil { @@ -111,7 +111,7 @@ func main() {  			log.Fatal(err)  		} -		outname = filepath.Join(outdir, todir, l.Hocrname + "_" + l.Name + "_" + avgstr + ".txt") +		outname = filepath.Join(outdir, todir, l.OcrName + "_" + l.Name + "_" + avgstr + ".txt")  		outfile, err = os.Create(outname)  		if err != nil {  			fmt.Fprintf(os.Stderr, "Failed to create %s\n", outname) diff --git a/line-conf-buckets/line-conf-buckets.go b/line-conf-buckets/line-conf-buckets.go index c2df074..1c33ba4 100644 --- a/line-conf-buckets/line-conf-buckets.go +++ b/line-conf-buckets/line-conf-buckets.go @@ -5,42 +5,23 @@ import (  	"flag"  	"fmt"  	"io" -	"io/ioutil"  	"log"  	"os"  	"path/filepath"  	"sort"  	"strconv"  	"strings" -) - -type LineDetail struct { -	Filename string -	Avgconf float64 -	Filebase string -	Basename string -	Dirname string -	Fulltext string -} - -type LineDetails []LineDetail - -// Used by sort.Sort. -func (l LineDetails) Len() int { return len(l) } -// Used by sort.Sort. -func (l LineDetails) Less(i, j int) bool { -	return l[i].Avgconf < l[j].Avgconf -} - -// Used by sort.Sort. -func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] } +	"git.rescribe.xyz/testingtools/parse" +	"git.rescribe.xyz/testingtools/parse/prob" +) -func copyline(filebase string, dirname string, basename string, avgconf string, outdir string, todir string) (err error) { +// TODO: this is just a placeholder, do this more sensibly, as -tess does (hint: full txt should already be in the LineDetail) +func copyline(filebase string, dirname string, basename string, avgconf string, outdir string, todir string, l parse.LineDetail) (err error) {  	outname := filepath.Join(outdir, todir, filepath.Base(dirname) + "_" + basename + "_" + avgconf)  	//log.Fatalf("I'd use '%s' as outname, and '%s' as filebase\n", outname, filebase) -	for _, extn := range []string{".bin.png", ".txt"} { +	for _, extn := range []string{".txt"} {  		infile, err := os.Open(filebase + extn)  		if err != nil {  			fmt.Fprintf(os.Stderr, "Failed to open %s\n", filebase + extn) @@ -66,6 +47,16 @@ func copyline(filebase string, dirname string, basename string, avgconf string,  		}  	} +	f, err := os.Create(outname + ".bin.png") +	if err != nil { +		return err +	} +	defer f.Close() +	err = l.Img.CopyLineTo(f) +	if err != nil { +		return err +	} +  	return err  } @@ -82,77 +73,28 @@ func main() {  		os.Exit(1)  	} -	lines := make(LineDetails, 0) +	lines := make(parse.LineDetails, 0)  	for _, f := range flag.Args() {  		file, err := os.Open(f)  		if err != nil { -			fmt.Fprintf(os.Stderr, "Error opening %s\n", f)  			log.Fatal(err)  		}  		defer file.Close()  		reader := bufio.NewReader(file) -		totalconf := float64(0) -		num := 0 - -		err = nil -		for err == nil { -			var line string -                        line, err = reader.ReadString('\n') -			fields := strings.Fields(line) - -			if len(fields) == 2 { -				conf, converr := strconv.ParseFloat(fields[1], 64) -				if converr != nil { -					fmt.Fprintf(os.Stderr, "Error: can't convert '%s' to float (full line: %s)\n", fields[1], line) -					continue -				} -				totalconf += conf -				num += 1 -			} +		newlines, err := prob.GetLineDetails(f, reader) +		if err != nil { +			log.Fatal(err)  		} -		avg := totalconf / float64(num) -		// Explicitly close file immediately after use, rather than relying on defer, -		// as too many files could be opened before any of the files are closed, leading -		// to a 'too many open files' error -		// TODO: rewrite this loop so it uses a function or two, so we can rely -		//       on defer sensibly again. +                for _, l := range newlines { +                        lines = append(lines, l) +                } +		// explicitly close the file, so we can be sure we won't run out of +		// handles before defer runs  		file.Close() - -		if num == 0 || avg == 0 { -			continue -		} - -		var linedetail LineDetail -		linedetail.Filename = f -		linedetail.Avgconf = avg -		linedetail.Filebase = strings.Replace(f, ".prob", "", 1) -		linedetail.Basename = filepath.Base(linedetail.Filebase) -		linedetail.Dirname = filepath.Dir(linedetail.Filebase) - -		txtfile, ferr := os.Open(linedetail.Filebase + ".txt") -		if ferr != nil { -			fmt.Fprintf(os.Stderr, "Error opening %s\n", linedetail.Filebase + ".txt") -			log.Fatal(ferr) -		} -		defer txtfile.Close() -		ft, ferr := ioutil.ReadAll(txtfile) -		if ferr != nil { -			fmt.Fprintf(os.Stderr, "Error reading %s\n", linedetail.Filebase + ".txt") -			log.Fatal(ferr) -		} -		linedetail.Fulltext = string(ft) -		// Explicitly close file immediately after use, rather than relying on defer, -		// as too many files could be opened before any of the files are closed, leading -		// to a 'too many open files' error -		// TODO: rewrite this loop so it uses a function or two, so we can rely -		//       on defer sensibly again. -		txtfile.Close() - -		lines = append(lines, linedetail)  	}  	sort.Sort(lines) @@ -178,8 +120,12 @@ func main() {  		}  		avgstr := strconv.FormatFloat(l.Avgconf, 'G', -1, 64) -		avgstr = avgstr[2:] -		err := copyline(l.Filebase, l.Dirname, l.Basename, avgstr, outdir, todir) +		if len(avgstr) > 2 { +			avgstr = avgstr[2:] +		} +		filebase := strings.Replace(l.Name, ".prob", "", 1) +		basename := filepath.Base(filebase) +		err := copyline(filebase, l.OcrName, basename, avgstr, outdir, todir, l)  		if err != nil {  			log.Fatal(err)  		} diff --git a/parse/hocr/hocr.go b/parse/hocr/hocr.go index a281a7a..f7cac05 100644 --- a/parse/hocr/hocr.go +++ b/parse/hocr/hocr.go @@ -1,7 +1,8 @@  package hocr  // TODO: consider making GetLineDetails() a function of Hocr, so could do a -//       similar thing with prob format files too. +//       similar thing with prob format files too, and then fire them both +//       off a generic interface, potentially.  // TODO: Parse line name to zero pad line numbers, so they come out in the correct order  import ( @@ -137,7 +138,7 @@ func GetLineDetails(h Hocr, i image.Image, name string) (parse.LineDetails, erro  		}  		line.Text = strings.TrimRight(linetext, " ")  		line.Text += "\n" -		line.Hocrname = name +		line.OcrName = name  		var imgd parse.ImgDirect  		imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))  		line.Img = imgd diff --git a/parse/line.go b/parse/line.go index 3ddde76..9a2be8e 100644 --- a/parse/line.go +++ b/parse/line.go @@ -9,6 +9,7 @@ import (  	"image"  	"image/png"  	"io" +	"os"  )  type LineDetail struct { @@ -16,7 +17,7 @@ type LineDetail struct {  	Avgconf float64  	Img CopyableLine  	Text string -	Hocrname string +	OcrName string  }  type CopyableLine interface { @@ -37,6 +38,21 @@ func (i ImgDirect) CopyLineTo(w io.Writer) (error) {  	return nil  } +type ImgPath struct { +	Path string +} + +func (i ImgPath) CopyLineTo(w io.Writer) (error) { +	f, err := os.Open(i.Path) +	if err != nil { +		return err +	} +	defer f.Close() + +	_, err = io.Copy(w, f) +	return err +} +  type LineDetails []LineDetail  // Used by sort.Sort. diff --git a/parse/prob/prob.go b/parse/prob/prob.go new file mode 100644 index 0000000..5a84567 --- /dev/null +++ b/parse/prob/prob.go @@ -0,0 +1,72 @@ +package prob + +import ( +	"bufio" +	"io/ioutil" +	"path/filepath" +	"strconv" +	"strings" + +	"git.rescribe.xyz/testingtools/parse" +) + +// TODO: probably switch to just relying on io.Reader +func getLineAvg(r *bufio.Reader) (float64, error) { +	var err error + +	totalconf := float64(0) +	num := 0 + +	err = nil +	for err == nil { +		var line string +		line, err = r.ReadString('\n') +		fields := strings.Fields(line) + +		if len(fields) == 2 { +			conf, converr := strconv.ParseFloat(fields[1], 64) +			if converr != nil { +				continue +			} +			totalconf += conf +			num += 1 +		} +	} +	if num <= 0 { +		return 0, nil +	} +	avg := totalconf / float64(num) +	return avg, nil +} + +// TODO: probably switch to just relying on io.Reader +// Note this only processes one line at a time +func GetLineDetails(name string, r *bufio.Reader) (parse.LineDetails, error) { +	var line parse.LineDetail +	lines := make(parse.LineDetails, 0) + +	avg, err := getLineAvg(r) +	if err != nil { +		return lines, err +	} + +	filebase := strings.Replace(name, ".prob", "", 1) + +	txt, err := ioutil.ReadFile(filebase + ".txt") +	if err != nil { +		return lines, err +	} + +	line.Name = name +	line.Avgconf = avg +	line.Text = string(txt) +	line.OcrName = filepath.Dir(filebase) + +	var imgfn parse.ImgPath +	imgfn.Path = filebase + ".bin.png" +	line.Img = imgfn + +	lines = append(lines, line) + +	return lines, nil +} | 
