diff options
| author | Nick White <git@njw.name> | 2019-01-03 19:18:28 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-01-03 19:18:28 +0000 | 
| commit | 696b441cbac750b1ba3df7e4bb29e9f0120fb0b3 (patch) | |
| tree | 84961911d1c0fa73f3a11999548575af150a283d | |
| parent | de82a6413f4c01e916ad95fe85a0fd46b83d863c (diff) | |
Add line-conf-buckets, to filter out and find potential ground-truth
| -rw-r--r-- | line-conf-avg/line-conf-avg.go | 1 | ||||
| -rw-r--r-- | line-conf-buckets/line-conf-buckets.go | 172 | 
2 files changed, 173 insertions, 0 deletions
| diff --git a/line-conf-avg/line-conf-avg.go b/line-conf-avg/line-conf-avg.go index 5b7d2a4..6758fc5 100644 --- a/line-conf-avg/line-conf-avg.go +++ b/line-conf-avg/line-conf-avg.go @@ -38,6 +38,7 @@ func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] }  func main() {  	flag.Usage = func() {  		fmt.Fprintf(os.Stderr, "Usage: line-conf-avg [-html] [-nosort] prob1 [prob2] [...]\n") +		fmt.Fprintf(os.Stderr, "Prints a report of the average confidence for each line\n")  		flag.PrintDefaults()  	}  	var usehtml = flag.Bool("html", false, "output html page") diff --git a/line-conf-buckets/line-conf-buckets.go b/line-conf-buckets/line-conf-buckets.go new file mode 100644 index 0000000..9a125d2 --- /dev/null +++ b/line-conf-buckets/line-conf-buckets.go @@ -0,0 +1,172 @@ +package main + +import ( +	"bufio" +	"flag" +	"fmt" +	"io" +	"io/ioutil" +	"log" +	"os" +	"path/filepath" +	"sort" +	"strconv" +	"strings" +) + +type LineDetail struct { +	Filename string +	Avgconf float64 +	Filebase string +	Basename string +	Dirname string +	Fulltext string +} + +type LineDetails []LineDetail + +// Used by sort.Sort. +func (l LineDetails) Len() int { return len(l) } + +// Used by sort.Sort. +func (l LineDetails) Less(i, j int) bool { +	return l[i].Avgconf < l[j].Avgconf +} + +// Used by sort.Sort. +func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] } + +func copyline(filebase string, dirname string, basename string, avgconf string, outdir string, todir string) (err error) { +	outname := filepath.Join(outdir, todir, filepath.Base(dirname) + "_" + basename + "_" + avgconf) +	//log.Fatalf("I'd use '%s' as outname, and '%s' as filebase\n", outname, filebase) + +	for _, extn := range []string{".bin.png", ".txt"} { +		infile, err := os.Open(filebase + extn) +		if err != nil { +			fmt.Fprintf(os.Stderr, "Failed to open %s\n", filebase + extn) +			return err +		} +		defer infile.Close() + +		err = os.MkdirAll(filepath.Join(outdir, todir), 0700) +		if err != nil { +			return err +		} +	 +		outfile, err := os.Create(outname + extn) +		if err != nil { +			fmt.Fprintf(os.Stderr, "Failed to create %s\n", outname + extn) +			return err +		} +		defer outfile.Close() +	 +		_, err = io.Copy(outfile, infile) +		if err != nil { +			return err +		} +	} + +	return err +} + +func main() { +	flag.Usage = func() { +		fmt.Fprintf(os.Stderr, "Usage: line-conf-buckets prob1 [prob2] [...]\n") +		fmt.Fprintf(os.Stderr, "Copies image-text line pairs into different directories according\n") +		fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n") +		flag.PrintDefaults() +	} +	flag.Parse() +	if flag.NArg() < 1 { +		flag.Usage() +		os.Exit(1) +	} + +	lines := make(LineDetails, 0) + +	for _, f := range flag.Args() { +		file, err := os.Open(f) +		if err != nil { +			log.Fatal(err) +		} +		defer file.Close() + +		reader := bufio.NewReader(file) + +		totalconf := float64(0) +		num := 0 + +		err = nil +		for err == nil { +			var line string +                        line, err = reader.ReadString('\n') +			fields := strings.Fields(line) + +			if len(fields) == 2 { +				conf, converr := strconv.ParseFloat(fields[1], 64) +				if converr != nil { +					fmt.Fprintf(os.Stderr, "Error: can't convert '%s' to float (full line: %s)\n", fields[1], line) +					continue +				} +				totalconf += conf +				num += 1 +			} +		} +		avg := totalconf / float64(num) + +		if num == 0 || avg == 0 { +			continue +		} + +		var linedetail LineDetail +		linedetail.Filename = f +		linedetail.Avgconf = avg +		linedetail.Filebase = strings.Replace(f, ".prob", "", 1) +		linedetail.Basename = filepath.Base(linedetail.Filebase) +		linedetail.Dirname = filepath.Dir(linedetail.Filebase) +		ft, ferr := ioutil.ReadFile(linedetail.Filebase + ".txt") +		if ferr != nil { +			log.Fatal(err) +		} +		linedetail.Fulltext = string(ft) +		lines = append(lines, linedetail) +	} + +	sort.Sort(lines) + +	worstnum := 0 +	mediumnum := 0 +	bestnum := 0 + +	outdir := "buckets" // TODO: set this from cmdline +	todir := "" + +	for _, l := range lines { +		switch { +		case l.Avgconf < 0.95:  +			todir = "bad" +			worstnum++ +		case l.Avgconf < 0.98: +			todir = "95to98" +			mediumnum++ +		default: +			todir = "98plus" +			bestnum++ +		} + +		avgstr := strconv.FormatFloat(l.Avgconf, 'G', -1, 64) +		avgstr = avgstr[2:] +		err := copyline(l.Filebase, l.Dirname, l.Basename, avgstr, outdir, todir) +		if err != nil { +			log.Fatal(err) +		} +	} + +	total := worstnum + mediumnum + bestnum + +	fmt.Printf("Copied lines to %s\n", outdir) +	fmt.Printf("---------------------------------\n") +	fmt.Printf("Lines 98%%+ quality:     %d%%\n", 100 * bestnum / total) +	fmt.Printf("Lines 95-98%% quality:   %d%%\n", 100 * mediumnum / total) +	fmt.Printf("Lines <95%% quality:     %d%%\n", 100 * worstnum / total) +} | 
