diff options
author | Nick White <git@njw.name> | 2020-11-09 12:40:57 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2020-11-09 12:40:57 +0000 |
commit | 34b5735503edb9c5ab635c84cd356f19df7d7381 (patch) | |
tree | dd3c1ee9df2fcc7cf5d3247c2e523fb464513da0 /cmd | |
parent | 48f817f0dfd3e89c372ac358418fe69b43eefa1b (diff) |
Set hocr config options directly rather than relying on 'hocr' config file
This ensures that bookpipeline will still work even if TESSDATA_PREFIX has
been set to a directory without configs in it.
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/bookpipeline/main.go | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 36295a6..b3ffc53 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -216,7 +216,7 @@ func ocr(training string) func(chan string, chan string, chan error, *log.Logger for path := range toocr { logger.Println("OCRing", path) name := strings.Replace(path, ".png", "", 1) - cmd := exec.Command("tesseract", "-l", training, path, name, "hocr") + cmd := exec.Command("tesseract", "-l", training, path, name, "-c", "tessedit_create_hocr=1", "-c", "hocr_font_info=0") var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr |