summaryrefslogtreecommitdiff
path: root/hocrtotxt/main.go
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-02-25 12:09:06 +0000
committerNick White <git@njw.name>2019-02-25 12:29:59 +0000
commit3c4c5f7c202b7c54ca8d23e7bd7bff4a4bb696cc (patch)
tree3e2be0e72fb4fbdac80c4df0edb3c67adac68625 /hocrtotxt/main.go
parentd8ca7b584b693a2a61dd88767a81d99bc48aca32 (diff)
Add tool to extract plain text from hocr
Diffstat (limited to 'hocrtotxt/main.go')
-rw-r--r--hocrtotxt/main.go30
1 files changed, 30 insertions, 0 deletions
diff --git a/hocrtotxt/main.go b/hocrtotxt/main.go
new file mode 100644
index 0000000..6821a9e
--- /dev/null
+++ b/hocrtotxt/main.go
@@ -0,0 +1,30 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "os"
+
+ "rescribe.xyz/go.git/lib/hocr"
+)
+
+func main() {
+ flag.Usage = func() {
+ fmt.Fprintf(os.Stderr, "Usage: hocrtotxt hocrfile\n")
+ fmt.Fprintf(os.Stderr, "Prints the text from a hocr file.\n")
+ flag.PrintDefaults()
+ }
+ flag.Parse()
+ if flag.NArg() != 1 {
+ flag.Usage()
+ os.Exit(1)
+ }
+
+ text, err := hocr.GetText(flag.Arg(0))
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ fmt.Printf("%s\n", text)
+}