diff options
Diffstat (limited to 'cmd/rescribe/main.go')
-rw-r--r-- | cmd/rescribe/main.go | 35 |
1 files changed, 32 insertions, 3 deletions
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 3b69b21..8d7c07b 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -13,8 +13,10 @@ import ( "io/ioutil" "log" "os" + "os/exec" "path/filepath" "regexp" + "strings" "time" "rescribe.xyz/bookpipeline" @@ -74,7 +76,7 @@ func resetTimer(t *time.Timer, d time.Duration) { func main() { verbose := flag.Bool("v", false, "verbose") - training := flag.String("t", "rescribealphav5", "default tesseract training file to use (without the .traineddata part)") + training := flag.String("t", "training/rescribev7_fast.traineddata", "path to the tesseract training file to use") flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -103,6 +105,33 @@ func main() { verboselog = log.New(n, "", 0) } + f, err := os.Open(*training) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: Training file %s could not be opened.\n", *training) + fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n") + os.Exit(1) + } + f.Close() + + abstraining, err := filepath.Abs(*training) + if err != nil { + log.Fatalf("Error getting absolute path of training %s: %v", err) + } + tessPrefix, trainingName := filepath.Split(abstraining) + trainingName = strings.TrimSuffix(trainingName, ".traineddata") + err = os.Setenv("TESSDATA_PREFIX", tessPrefix) + if err != nil { + log.Fatalln("Error setting TESSDATA_PREFIX:", err) + } + + // TODO: would be good to be able to set custom path to tesseract + _, err = exec.Command("tesseract", "--help").Output() + if err != nil { + fmt.Fprintf(os.Stderr, "Error: Can't run Tesseract.\n") + fmt.Fprintf(os.Stderr, "Ensure that Tesseract is installed and available.\n") + os.Exit(1) + } + tempdir, err := ioutil.TempDir("", "bookpipeline") if err != nil { log.Fatalln("Error setting up temporary directory:", err) @@ -120,14 +149,14 @@ func main() { fmt.Printf("Copying book to pipeline\n") - err = uploadbook(bookdir, bookname, *training, conn) + err = uploadbook(bookdir, bookname, trainingName, conn) if err != nil { _ = os.RemoveAll(tempdir) log.Fatalln(err) } fmt.Printf("Processing book\n") - err = processbook(*training, conn) + err = processbook(trainingName, conn) if err != nil { _ = os.RemoveAll(tempdir) log.Fatalln(err) |