diff options
author | Nick White <git@njw.name> | 2021-03-23 11:14:35 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2021-03-23 11:14:35 +0000 |
commit | fa4f48ad54ec94c222269d335d40b21becff92a4 (patch) | |
tree | ca3ba1a4b46d1570607d9233455496e93e10517a /cmd/extracthocrlines | |
parent | 59d139893ab9044447e14fa091f50b48969847b5 (diff) |
hocr: Add ability to specify a custom image path for hocr line extraction, and use it in extracthocrlines
Diffstat (limited to 'cmd/extracthocrlines')
-rw-r--r-- | cmd/extracthocrlines/main.go | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/cmd/extracthocrlines/main.go b/cmd/extracthocrlines/main.go index d765875..fa5ef28 100644 --- a/cmd/extracthocrlines/main.go +++ b/cmd/extracthocrlines/main.go @@ -14,12 +14,13 @@ import ( "log" "os" "path/filepath" + "strings" "rescribe.xyz/utils/pkg/hocr" "rescribe.xyz/utils/pkg/line" ) -const usage = `Usage: extracthocrlines file.hocr [file.hocr] +const usage = `Usage: extracthocrlines [-d] [-e] file.hocr [file.hocr] Copies the text and corresponding image section for each line of a HOCR file into separate files, which is useful for OCR @@ -64,6 +65,7 @@ func main() { flag.PrintDefaults() } dir := flag.String("d", ".", "Directory to save lines in") + embeddedimgpath := flag.Bool("e", false, "Use image path embedded in hOCR (rather than the path of the .hocr file with a .png suffix)") flag.Parse() if flag.NArg() < 1 { flag.Usage() @@ -71,7 +73,14 @@ func main() { } for _, f := range flag.Args() { - newlines, err := hocr.GetLineDetails(f) + var err error + var newlines line.Details + if *embeddedimgpath { + newlines, err = hocr.GetLineDetails(f) + } else { + imgName := strings.TrimSuffix(f, ".hocr") + ".png" + newlines, err = hocr.GetLineDetailsCustomImg(f, imgName) + } if err != nil { log.Fatal(err) } |