From ce44b0c2038240b28283b1eca8dc03aa37a9875e Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 23 Aug 2019 16:51:50 +0100 Subject: Expect source files to be .jpg --- bookpipeline/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bookpipeline/main.go b/bookpipeline/main.go index 30f9cfa..22bf7f5 100644 --- a/bookpipeline/main.go +++ b/bookpipeline/main.go @@ -116,7 +116,7 @@ func ocr(training string) func(chan string, chan string, chan error, *log.Logger return func (toocr chan string, up chan string, errc chan error, logger *log.Logger) { for path := range toocr { logger.Println("OCRing", path) - name := strings.Replace(path, ".png", "", 1) // TODO: handle any file extension + name := strings.Replace(path, ".png", "", 1) cmd := exec.Command("tesseract", "-l", training, path, name, "hocr") err := cmd.Run() if err != nil { @@ -226,7 +226,7 @@ func main() { } // TODO: match jpg too - origPattern := regexp.MustCompile(`[0-9]{4}.png$`) // TODO: match other file naming + origPattern := regexp.MustCompile(`[0-9]{4}.jpg$`) // TODO: match other file naming preprocessedPattern := regexp.MustCompile(`_bin[0-9].[0-9].png$`) //ocredPattern := regexp.MustCompile(`.hocr$`) -- cgit v1.2.1-24-ge1ad