summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bucket-lines-hocr/bucket-lines-hocr.go (renamed from line-conf-buckets-tess/line-conf-buckets-tess.go)25
1 files changed, 15 insertions, 10 deletions
diff --git a/line-conf-buckets-tess/line-conf-buckets-tess.go b/bucket-lines-hocr/bucket-lines-hocr.go
index 38dec15..b35c824 100644
--- a/line-conf-buckets-tess/line-conf-buckets-tess.go
+++ b/bucket-lines-hocr/bucket-lines-hocr.go
@@ -1,7 +1,6 @@
package main
-// TODO: rename
-// TODO: set bucket dirname from cmdline
+// TODO: merge with -prob, using filename extension to determine what to do for each file
import (
"flag"
@@ -46,12 +45,24 @@ func detailsFromFile(f string) (parse.LineDetails, error) {
}
func main() {
+ b := parse.BucketSpecs{
+ // minimum confidence, name
+ { 0, "bad" },
+ { 0.95, "95to98" },
+ { 0.98, "98plus" },
+ }
+
flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: line-conf-buckets hocr1 [hocr2] [...]\n")
+ fmt.Fprintf(os.Stderr, "Usage: bucket-lines-hocr [-d dir] hocr1 [hocr2] [...]\n")
fmt.Fprintf(os.Stderr, "Copies image-text line pairs into different directories according\n")
fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n")
+ fmt.Fprintf(os.Stderr, "This uses the x_wconf data in .hocr files, which it assumes will be.\n")
+ fmt.Fprintf(os.Stderr, "in the same directory as the line's image and text files. It can\n")
+ fmt.Fprintf(os.Stderr, "handle hocr where each character is tagged separately and hocr where\n")
+ fmt.Fprintf(os.Stderr, "only whole words are tagged.\n")
flag.PrintDefaults()
}
+ dir := flag.String("d", "buckets", "Directory to store the buckets")
flag.Parse()
if flag.NArg() < 1 {
flag.Usage()
@@ -71,13 +82,7 @@ func main() {
}
}
- b := parse.BucketSpecs{
- { 0, "bad" },
- { 0.95, "95to98" },
- { 0.98, "98plus" },
- }
-
- stats, err := parse.BucketUp(lines, b, "newbuckets")
+ stats, err := parse.BucketUp(lines, b, *dir)
if err != nil {
log.Fatal(err)
}