diff options
| -rw-r--r-- | bucket-lines-hocr/bucket-lines-hocr.go (renamed from line-conf-buckets-tess/line-conf-buckets-tess.go) | 25 | 
1 files changed, 15 insertions, 10 deletions
| diff --git a/line-conf-buckets-tess/line-conf-buckets-tess.go b/bucket-lines-hocr/bucket-lines-hocr.go index 38dec15..b35c824 100644 --- a/line-conf-buckets-tess/line-conf-buckets-tess.go +++ b/bucket-lines-hocr/bucket-lines-hocr.go @@ -1,7 +1,6 @@  package main -// TODO: rename -// TODO: set bucket dirname from cmdline +// TODO: merge with -prob, using filename extension to determine what to do for each file  import (  	"flag" @@ -46,12 +45,24 @@ func detailsFromFile(f string) (parse.LineDetails, error) {  }  func main() { +	b := parse.BucketSpecs{ +		// minimum confidence, name +		{ 0, "bad" }, +		{ 0.95, "95to98" }, +		{ 0.98, "98plus" }, +	} +  	flag.Usage = func() { -		fmt.Fprintf(os.Stderr, "Usage: line-conf-buckets hocr1 [hocr2] [...]\n") +		fmt.Fprintf(os.Stderr, "Usage: bucket-lines-hocr [-d dir] hocr1 [hocr2] [...]\n")  		fmt.Fprintf(os.Stderr, "Copies image-text line pairs into different directories according\n")  		fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n") +		fmt.Fprintf(os.Stderr, "This uses the x_wconf data in .hocr files, which it assumes will be.\n") +		fmt.Fprintf(os.Stderr, "in the same directory as the line's image and text files. It can\n") +		fmt.Fprintf(os.Stderr, "handle hocr where each character is tagged separately and hocr where\n") +		fmt.Fprintf(os.Stderr, "only whole words are tagged.\n")  		flag.PrintDefaults()  	} +	dir := flag.String("d", "buckets", "Directory to store the buckets")  	flag.Parse()  	if flag.NArg() < 1 {  		flag.Usage() @@ -71,13 +82,7 @@ func main() {  		}  	} -	b := parse.BucketSpecs{ -		{ 0, "bad" }, -		{ 0.95, "95to98" }, -		{ 0.98, "98plus" }, -	} - -	stats, err := parse.BucketUp(lines, b, "newbuckets") +	stats, err := parse.BucketUp(lines, b, *dir)  	if err != nil {  		log.Fatal(err)  	} | 
