summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bucket-lines/bucket.go2
-rw-r--r--bucket-lines/main.go8
-rw-r--r--lib/hocr/lines.go3
-rw-r--r--lib/prob/prob.go2
4 files changed, 6 insertions, 9 deletions
diff --git a/bucket-lines/bucket.go b/bucket-lines/bucket.go
index db87418..bf25405 100644
--- a/bucket-lines/bucket.go
+++ b/bucket-lines/bucket.go
@@ -51,7 +51,7 @@ func bucketLine(l line.Detail, buckets BucketSpecs, dirname string) (string, err
avgstr = avgstr[2:]
}
- base := filepath.Join(dirname, todir, filepath.Base(l.OcrName) + "_" + l.Name + "_" + avgstr)
+ base := filepath.Join(dirname, todir, l.OcrName + "_" + l.Name + "_" + avgstr)
err := os.MkdirAll(filepath.Join(dirname, todir), 0700)
if err != nil {
diff --git a/bucket-lines/main.go b/bucket-lines/main.go
index ab6f977..36e9101 100644
--- a/bucket-lines/main.go
+++ b/bucket-lines/main.go
@@ -25,11 +25,11 @@ func main() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: bucket-lines [-d dir] [-s specs.json] [hocr1] [prob1] [hocr2] [...]\n")
fmt.Fprintf(os.Stderr, "Copies image-text line pairs into different directories according\n")
- fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n\n")
- fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n\n")
- fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n\n")
+ fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n")
+ fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n")
+ fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n")
fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n")
- fmt.Fprintf(os.Stderr, "option.\n\n")
+ fmt.Fprintf(os.Stderr, "option.\n")
fmt.Fprintf(os.Stderr, "The .prob and .hocr files are assumed to be in the same directory\n")
fmt.Fprintf(os.Stderr, "as the line's image and text files.\n\n")
flag.PrintDefaults()
diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go
index 985c7d0..84c2130 100644
--- a/lib/hocr/lines.go
+++ b/lib/hocr/lines.go
@@ -2,9 +2,6 @@ package hocr
// TODO: Parse line name to zero pad line numbers, so they can
// be sorted easily
-// TODO: have same filename format as .prob uses, so include base
-// dirname, and don't include line numbers if there's only
-// one line in the hocr
import (
"image"
diff --git a/lib/prob/prob.go b/lib/prob/prob.go
index 55d2629..2fd7fb9 100644
--- a/lib/prob/prob.go
+++ b/lib/prob/prob.go
@@ -57,7 +57,7 @@ func GetLineDetails(probfn string) (line.Details, error) {
l.Name = filepath.Base(filebase)
l.Avgconf = avg
l.Text = string(txt)
- l.OcrName = filepath.Dir(filebase)
+ l.OcrName = filepath.Base(filepath.Dir(filebase))
var imgfn line.ImgPath
imgfn.Path = filebase + ".bin.png"