summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2021-02-09 17:45:35 +0000
committerNick White <git@njw.name>2021-02-09 17:45:35 +0000
commit45943f847b3db8db5142c79a806f251659264ca0 (patch)
tree10b1c484f6d858f0dedc267c10dfe97f80342799
parent13ce8fc4b45073e1f81a39c4923e44420509be73 (diff)
hocr: Use extracted page name for line naming
This means that even in multi page hocrs with lines with the same id (like line_1_1), then the page name will be different, so extracthocrlines now won't mistakenly name different lines the same and therefore overwrite them.
-rw-r--r--pkg/hocr/lines.go14
1 files changed, 8 insertions, 6 deletions
diff --git a/pkg/hocr/lines.go b/pkg/hocr/lines.go
index 393f0ec..1c759e0 100644
--- a/pkg/hocr/lines.go
+++ b/pkg/hocr/lines.go
@@ -54,7 +54,7 @@ func LineText(l OcrLine) string {
return linetext
}
-func parseLineDetails(h Hocr, dir string, name string) (line.Details, error) {
+func parseLineDetails(h Hocr, dir string) (line.Details, error) {
lines := make(line.Details, 0)
for _, p := range h.Pages {
@@ -99,7 +99,11 @@ func parseLineDetails(h Hocr, dir string, name string) (line.Details, error) {
ln.Name = l.Id
ln.Avgconf = (totalconf / float64(num)) / 100
ln.Text = LineText(l)
- ln.OcrName = name
+ imgpath, err := imagePath(p.Title)
+ if err != nil {
+ return lines, err
+ }
+ ln.OcrName = strings.TrimSuffix(filepath.Base(imgpath), ".png")
if gray != nil {
var imgd line.ImgDirect
imgd.Img = gray.SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
@@ -127,8 +131,7 @@ func GetLineDetails(hocrfn string) (line.Details, error) {
return newlines, err
}
- n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
- return parseLineDetails(h, filepath.Dir(hocrfn), n)
+ return parseLineDetails(h, filepath.Dir(hocrfn))
}
// GetLineBasics parses a hocr file and returns a corresponding
@@ -146,6 +149,5 @@ func GetLineBasics(hocrfn string) (line.Details, error) {
return newlines, err
}
- n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
- return parseLineDetails(h, filepath.Dir(hocrfn), n)
+ return parseLineDetails(h, filepath.Dir(hocrfn))
}