diff options
author | Nick White <git@njw.name> | 2020-02-05 11:00:20 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2020-02-05 11:00:20 +0000 |
commit | 7bfb7468fb7331443882c2f6d804de97df25ed0f (patch) | |
tree | 0fed854543ca91a8b830f50aec1ea9d0d9b71730 | |
parent | 29fa900f3d5ad69d50d9a28f6875c3f81961b3f1 (diff) |
Fix allOCRed for wipeonly books (hopefully)
allOCRed was checking for wipePattern files, however they should have
been transformed into the regular preprocessedPattern for OCR anyway,
so shouldn't have been directly OCRed. Thus, allOCRed was mistakenly
looking for .hocr versions of the original wipePattern files, which
never would have been produced.
-rw-r--r-- | cmd/bookpipeline/main.go | 6 |
1 files changed, 1 insertions, 5 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 3ce1f15..402271e 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -416,15 +416,11 @@ func allOCRed(bookname string, conn Pipeliner) bool { return false } - // Full wipePattern can match things like 0000.png which getgbook - // can emit but aren't ocr-able - //wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.png$`) - wipePattern := regexp.MustCompile(`[0-9]{6}(.bin)?.png$`) preprocessedPattern := regexp.MustCompile(`_bin[0-9].[0-9].png$`) atleastone := false for _, png := range objs { - if wipePattern.MatchString(png) || preprocessedPattern.MatchString(png) { + if preprocessedPattern.MatchString(png) { atleastone = true found := false b := strings.TrimSuffix(filepath.Base(png), ".png") |