From 34b5735503edb9c5ab635c84cd356f19df7d7381 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 9 Nov 2020 12:40:57 +0000 Subject: Set hocr config options directly rather than relying on 'hocr' config file This ensures that bookpipeline will still work even if TESSDATA_PREFIX has been set to a directory without configs in it. --- cmd/bookpipeline/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 36295a6..b3ffc53 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -216,7 +216,7 @@ func ocr(training string) func(chan string, chan string, chan error, *log.Logger for path := range toocr { logger.Println("OCRing", path) name := strings.Replace(path, ".png", "", 1) - cmd := exec.Command("tesseract", "-l", training, path, name, "hocr") + cmd := exec.Command("tesseract", "-l", training, path, name, "-c", "tessedit_create_hocr=1", "-c", "hocr_font_info=0") var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr -- cgit v1.2.1-24-ge1ad