summaryrefslogtreecommitdiff
path: root/internal/pipeline
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-11-10 12:28:50 +0000
committerNick White <git@njw.name>2020-11-10 12:28:50 +0000
commitad7aaf490e78e969bb5495dfda06a33d2a176aec (patch)
tree64c9a9cbc50b2e0c8f9b5bdafecac6e6220212e1 /internal/pipeline
parentdac2f1ad471cd9896c16569fe02c69ff9b9855ba (diff)
[rescribe] Enable custom paths to tesseract command to be set (also improve some error output)
Diffstat (limited to 'internal/pipeline')
-rw-r--r--internal/pipeline/pipeline.go9
1 files changed, 6 insertions, 3 deletions
diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go
index c0accdb..f6598fd 100644
--- a/internal/pipeline/pipeline.go
+++ b/internal/pipeline/pipeline.go
@@ -189,12 +189,15 @@ func Wipe(towipe chan string, up chan string, errc chan error, logger *log.Logge
close(up)
}
-func Ocr(training string) func(chan string, chan string, chan error, *log.Logger) {
+func Ocr(training string, tesscmd string) func(chan string, chan string, chan error, *log.Logger) {
return func(toocr chan string, up chan string, errc chan error, logger *log.Logger) {
+ if tesscmd == "" {
+ tesscmd = "tesseract"
+ }
for path := range toocr {
logger.Println("OCRing", path)
name := strings.Replace(path, ".png", "", 1)
- cmd := exec.Command("tesseract", "-l", training, path, name, "-c", "tessedit_create_hocr=1", "-c", "hocr_font_info=0")
+ cmd := exec.Command(tesscmd, "-l", training, path, name, "-c", "tessedit_create_hocr=1", "-c", "hocr_font_info=0")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
@@ -491,7 +494,7 @@ func OcrPage(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string, ch
msgparts := strings.Split(msg.Body, " ")
bookname := filepath.Dir(msgparts[0])
if len(msgparts) > 1 && msgparts[1] != "" {
- process = Ocr(msgparts[1])
+ process = Ocr(msgparts[1], "")
}
d := filepath.Join(os.TempDir(), bookname)