From 3c4c5f7c202b7c54ca8d23e7bd7bff4a4bb696cc Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 25 Feb 2019 12:09:06 +0000 Subject: Add tool to extract plain text from hocr --- hocrtotxt/main.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 hocrtotxt/main.go (limited to 'hocrtotxt') diff --git a/hocrtotxt/main.go b/hocrtotxt/main.go new file mode 100644 index 0000000..6821a9e --- /dev/null +++ b/hocrtotxt/main.go @@ -0,0 +1,30 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + + "rescribe.xyz/go.git/lib/hocr" +) + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: hocrtotxt hocrfile\n") + fmt.Fprintf(os.Stderr, "Prints the text from a hocr file.\n") + flag.PrintDefaults() + } + flag.Parse() + if flag.NArg() != 1 { + flag.Usage() + os.Exit(1) + } + + text, err := hocr.GetText(flag.Arg(0)) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%s\n", text) +} -- cgit v1.2.1-24-ge1ad