diff options
author | Nick White <git@njw.name> | 2019-08-23 16:51:50 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-08-23 16:51:50 +0100 |
commit | ce44b0c2038240b28283b1eca8dc03aa37a9875e (patch) | |
tree | 8322548209b1c27550499d82eb80773dd272fea0 | |
parent | e5d5f4c270ae48022f2fc87cc5d65d8276de4d71 (diff) |
Expect source files to be .jpg
-rw-r--r-- | bookpipeline/main.go | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/bookpipeline/main.go b/bookpipeline/main.go index 30f9cfa..22bf7f5 100644 --- a/bookpipeline/main.go +++ b/bookpipeline/main.go @@ -116,7 +116,7 @@ func ocr(training string) func(chan string, chan string, chan error, *log.Logger return func (toocr chan string, up chan string, errc chan error, logger *log.Logger) { for path := range toocr { logger.Println("OCRing", path) - name := strings.Replace(path, ".png", "", 1) // TODO: handle any file extension + name := strings.Replace(path, ".png", "", 1) cmd := exec.Command("tesseract", "-l", training, path, name, "hocr") err := cmd.Run() if err != nil { @@ -226,7 +226,7 @@ func main() { } // TODO: match jpg too - origPattern := regexp.MustCompile(`[0-9]{4}.png$`) // TODO: match other file naming + origPattern := regexp.MustCompile(`[0-9]{4}.jpg$`) // TODO: match other file naming preprocessedPattern := regexp.MustCompile(`_bin[0-9].[0-9].png$`) //ocredPattern := regexp.MustCompile(`.hocr$`) |