diff options
author | Nick White <git@njw.name> | 2020-11-10 12:28:50 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2020-11-10 12:28:50 +0000 |
commit | ad7aaf490e78e969bb5495dfda06a33d2a176aec (patch) | |
tree | 64c9a9cbc50b2e0c8f9b5bdafecac6e6220212e1 /internal | |
parent | dac2f1ad471cd9896c16569fe02c69ff9b9855ba (diff) |
[rescribe] Enable custom paths to tesseract command to be set (also improve some error output)
Diffstat (limited to 'internal')
-rw-r--r-- | internal/pipeline/pipeline.go | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index c0accdb..f6598fd 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -189,12 +189,15 @@ func Wipe(towipe chan string, up chan string, errc chan error, logger *log.Logge close(up) } -func Ocr(training string) func(chan string, chan string, chan error, *log.Logger) { +func Ocr(training string, tesscmd string) func(chan string, chan string, chan error, *log.Logger) { return func(toocr chan string, up chan string, errc chan error, logger *log.Logger) { + if tesscmd == "" { + tesscmd = "tesseract" + } for path := range toocr { logger.Println("OCRing", path) name := strings.Replace(path, ".png", "", 1) - cmd := exec.Command("tesseract", "-l", training, path, name, "-c", "tessedit_create_hocr=1", "-c", "hocr_font_info=0") + cmd := exec.Command(tesscmd, "-l", training, path, name, "-c", "tessedit_create_hocr=1", "-c", "hocr_font_info=0") var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr @@ -491,7 +494,7 @@ func OcrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch msgparts := strings.Split(msg.Body, " ") bookname := filepath.Dir(msgparts[0]) if len(msgparts) > 1 && msgparts[1] != "" { - process = Ocr(msgparts[1]) + process = Ocr(msgparts[1], "") } d := filepath.Join(os.TempDir(), bookname) |