summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-08-23 16:51:50 +0100
committerNick White <git@njw.name>2019-08-23 16:51:50 +0100
commitce44b0c2038240b28283b1eca8dc03aa37a9875e (patch)
tree8322548209b1c27550499d82eb80773dd272fea0
parente5d5f4c270ae48022f2fc87cc5d65d8276de4d71 (diff)
Expect source files to be .jpg
-rw-r--r--bookpipeline/main.go4
1 files changed, 2 insertions, 2 deletions
diff --git a/bookpipeline/main.go b/bookpipeline/main.go
index 30f9cfa..22bf7f5 100644
--- a/bookpipeline/main.go
+++ b/bookpipeline/main.go
@@ -116,7 +116,7 @@ func ocr(training string) func(chan string, chan string, chan error, *log.Logger
return func (toocr chan string, up chan string, errc chan error, logger *log.Logger) {
for path := range toocr {
logger.Println("OCRing", path)
- name := strings.Replace(path, ".png", "", 1) // TODO: handle any file extension
+ name := strings.Replace(path, ".png", "", 1)
cmd := exec.Command("tesseract", "-l", training, path, name, "hocr")
err := cmd.Run()
if err != nil {
@@ -226,7 +226,7 @@ func main() {
}
// TODO: match jpg too
- origPattern := regexp.MustCompile(`[0-9]{4}.png$`) // TODO: match other file naming
+ origPattern := regexp.MustCompile(`[0-9]{4}.jpg$`) // TODO: match other file naming
preprocessedPattern := regexp.MustCompile(`_bin[0-9].[0-9].png$`)
//ocredPattern := regexp.MustCompile(`.hocr$`)