diff options
author | Nick White <git@njw.name> | 2019-02-25 12:09:06 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-02-25 12:29:59 +0000 |
commit | 3c4c5f7c202b7c54ca8d23e7bd7bff4a4bb696cc (patch) | |
tree | 3e2be0e72fb4fbdac80c4df0edb3c67adac68625 /hocrtotxt | |
parent | d8ca7b584b693a2a61dd88767a81d99bc48aca32 (diff) |
Add tool to extract plain text from hocr
Diffstat (limited to 'hocrtotxt')
-rw-r--r-- | hocrtotxt/main.go | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/hocrtotxt/main.go b/hocrtotxt/main.go new file mode 100644 index 0000000..6821a9e --- /dev/null +++ b/hocrtotxt/main.go @@ -0,0 +1,30 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + + "rescribe.xyz/go.git/lib/hocr" +) + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: hocrtotxt hocrfile\n") + fmt.Fprintf(os.Stderr, "Prints the text from a hocr file.\n") + flag.PrintDefaults() + } + flag.Parse() + if flag.NArg() != 1 { + flag.Usage() + os.Exit(1) + } + + text, err := hocr.GetText(flag.Arg(0)) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%s\n", text) +} |