From a05e986286efe9168ea404729652650086bab4cc Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 25 Jan 2019 17:38:33 +0000 Subject: Use consistent naming for .prob and .hocr OcrName --- bucket-lines/bucket.go | 2 +- bucket-lines/main.go | 8 ++++---- lib/hocr/lines.go | 3 --- lib/prob/prob.go | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/bucket-lines/bucket.go b/bucket-lines/bucket.go index db87418..bf25405 100644 --- a/bucket-lines/bucket.go +++ b/bucket-lines/bucket.go @@ -51,7 +51,7 @@ func bucketLine(l line.Detail, buckets BucketSpecs, dirname string) (string, err avgstr = avgstr[2:] } - base := filepath.Join(dirname, todir, filepath.Base(l.OcrName) + "_" + l.Name + "_" + avgstr) + base := filepath.Join(dirname, todir, l.OcrName + "_" + l.Name + "_" + avgstr) err := os.MkdirAll(filepath.Join(dirname, todir), 0700) if err != nil { diff --git a/bucket-lines/main.go b/bucket-lines/main.go index ab6f977..36e9101 100644 --- a/bucket-lines/main.go +++ b/bucket-lines/main.go @@ -25,11 +25,11 @@ func main() { flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage: bucket-lines [-d dir] [-s specs.json] [hocr1] [prob1] [hocr2] [...]\n") fmt.Fprintf(os.Stderr, "Copies image-text line pairs into different directories according\n") - fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n\n") - fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n\n") - fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n\n") + fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n") + fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n") + fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n") fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n") - fmt.Fprintf(os.Stderr, "option.\n\n") + fmt.Fprintf(os.Stderr, "option.\n") fmt.Fprintf(os.Stderr, "The .prob and .hocr files are assumed to be in the same directory\n") fmt.Fprintf(os.Stderr, "as the line's image and text files.\n\n") flag.PrintDefaults() diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go index 985c7d0..84c2130 100644 --- a/lib/hocr/lines.go +++ b/lib/hocr/lines.go @@ -2,9 +2,6 @@ package hocr // TODO: Parse line name to zero pad line numbers, so they can // be sorted easily -// TODO: have same filename format as .prob uses, so include base -// dirname, and don't include line numbers if there's only -// one line in the hocr import ( "image" diff --git a/lib/prob/prob.go b/lib/prob/prob.go index 55d2629..2fd7fb9 100644 --- a/lib/prob/prob.go +++ b/lib/prob/prob.go @@ -57,7 +57,7 @@ func GetLineDetails(probfn string) (line.Details, error) { l.Name = filepath.Base(filebase) l.Avgconf = avg l.Text = string(txt) - l.OcrName = filepath.Dir(filebase) + l.OcrName = filepath.Base(filepath.Dir(filebase)) var imgfn line.ImgPath imgfn.Path = filebase + ".bin.png" -- cgit v1.2.1-24-ge1ad