From 3c4c5f7c202b7c54ca8d23e7bd7bff4a4bb696cc Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Mon, 25 Feb 2019 12:09:06 +0000
Subject: Add tool to extract plain text from hocr

---
 hocrtotxt/main.go | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 hocrtotxt/main.go

(limited to 'hocrtotxt')

diff --git a/hocrtotxt/main.go b/hocrtotxt/main.go
new file mode 100644
index 0000000..6821a9e
--- /dev/null
+++ b/hocrtotxt/main.go
@@ -0,0 +1,30 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"os"
+
+	"rescribe.xyz/go.git/lib/hocr"
+)
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: hocrtotxt hocrfile\n")
+		fmt.Fprintf(os.Stderr, "Prints the text from a hocr file.\n")
+		flag.PrintDefaults()
+	}
+	flag.Parse()
+	if flag.NArg() != 1 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	text, err := hocr.GetText(flag.Arg(0))
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	fmt.Printf("%s\n", text)
+}
-- 
cgit v1.2.1-24-ge1ad