From fa4f48ad54ec94c222269d335d40b21becff92a4 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 23 Mar 2021 11:14:35 +0000 Subject: hocr: Add ability to specify a custom image path for hocr line extraction, and use it in extracthocrlines --- cmd/extracthocrlines/main.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'cmd') diff --git a/cmd/extracthocrlines/main.go b/cmd/extracthocrlines/main.go index d765875..fa5ef28 100644 --- a/cmd/extracthocrlines/main.go +++ b/cmd/extracthocrlines/main.go @@ -14,12 +14,13 @@ import ( "log" "os" "path/filepath" + "strings" "rescribe.xyz/utils/pkg/hocr" "rescribe.xyz/utils/pkg/line" ) -const usage = `Usage: extracthocrlines file.hocr [file.hocr] +const usage = `Usage: extracthocrlines [-d] [-e] file.hocr [file.hocr] Copies the text and corresponding image section for each line of a HOCR file into separate files, which is useful for OCR @@ -64,6 +65,7 @@ func main() { flag.PrintDefaults() } dir := flag.String("d", ".", "Directory to save lines in") + embeddedimgpath := flag.Bool("e", false, "Use image path embedded in hOCR (rather than the path of the .hocr file with a .png suffix)") flag.Parse() if flag.NArg() < 1 { flag.Usage() @@ -71,7 +73,14 @@ func main() { } for _, f := range flag.Args() { - newlines, err := hocr.GetLineDetails(f) + var err error + var newlines line.Details + if *embeddedimgpath { + newlines, err = hocr.GetLineDetails(f) + } else { + imgName := strings.TrimSuffix(f, ".hocr") + ".png" + newlines, err = hocr.GetLineDetailsCustomImg(f, imgName) + } if err != nil { log.Fatal(err) } -- cgit v1.2.1-24-ge1ad