summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-11-10 11:59:14 +0000
committerNick White <git@njw.name>2020-11-10 11:59:14 +0000
commitdac2f1ad471cd9896c16569fe02c69ff9b9855ba (patch)
tree3c16ab752d1f16fa63cb4031ef6ee6d7554fd1c3
parent7921b5ca6d6667dda09ae67dcc1ee987aef62ebb (diff)
[rescribe] Change -t to the path of the traineddata file, and set TESSDATA_PREFIX accordingly
-rw-r--r--cmd/rescribe/main.go35
1 files changed, 32 insertions, 3 deletions
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 3b69b21..8d7c07b 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -13,8 +13,10 @@ import (
"io/ioutil"
"log"
"os"
+ "os/exec"
"path/filepath"
"regexp"
+ "strings"
"time"
"rescribe.xyz/bookpipeline"
@@ -74,7 +76,7 @@ func resetTimer(t *time.Timer, d time.Duration) {
func main() {
verbose := flag.Bool("v", false, "verbose")
- training := flag.String("t", "rescribealphav5", "default tesseract training file to use (without the .traineddata part)")
+ training := flag.String("t", "training/rescribev7_fast.traineddata", "path to the tesseract training file to use")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
@@ -103,6 +105,33 @@ func main() {
verboselog = log.New(n, "", 0)
}
+ f, err := os.Open(*training)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error: Training file %s could not be opened.\n", *training)
+ fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n")
+ os.Exit(1)
+ }
+ f.Close()
+
+ abstraining, err := filepath.Abs(*training)
+ if err != nil {
+ log.Fatalf("Error getting absolute path of training %s: %v", err)
+ }
+ tessPrefix, trainingName := filepath.Split(abstraining)
+ trainingName = strings.TrimSuffix(trainingName, ".traineddata")
+ err = os.Setenv("TESSDATA_PREFIX", tessPrefix)
+ if err != nil {
+ log.Fatalln("Error setting TESSDATA_PREFIX:", err)
+ }
+
+ // TODO: would be good to be able to set custom path to tesseract
+ _, err = exec.Command("tesseract", "--help").Output()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error: Can't run Tesseract.\n")
+ fmt.Fprintf(os.Stderr, "Ensure that Tesseract is installed and available.\n")
+ os.Exit(1)
+ }
+
tempdir, err := ioutil.TempDir("", "bookpipeline")
if err != nil {
log.Fatalln("Error setting up temporary directory:", err)
@@ -120,14 +149,14 @@ func main() {
fmt.Printf("Copying book to pipeline\n")
- err = uploadbook(bookdir, bookname, *training, conn)
+ err = uploadbook(bookdir, bookname, trainingName, conn)
if err != nil {
_ = os.RemoveAll(tempdir)
log.Fatalln(err)
}
fmt.Printf("Processing book\n")
- err = processbook(*training, conn)
+ err = processbook(trainingName, conn)
if err != nil {
_ = os.RemoveAll(tempdir)
log.Fatalln(err)