summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-02-05 11:00:20 +0000
committerNick White <git@njw.name>2020-02-05 11:00:20 +0000
commit7bfb7468fb7331443882c2f6d804de97df25ed0f (patch)
tree0fed854543ca91a8b830f50aec1ea9d0d9b71730
parent29fa900f3d5ad69d50d9a28f6875c3f81961b3f1 (diff)
Fix allOCRed for wipeonly books (hopefully)
allOCRed was checking for wipePattern files, however they should have been transformed into the regular preprocessedPattern for OCR anyway, so shouldn't have been directly OCRed. Thus, allOCRed was mistakenly looking for .hocr versions of the original wipePattern files, which never would have been produced.
-rw-r--r--cmd/bookpipeline/main.go6
1 files changed, 1 insertions, 5 deletions
diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 3ce1f15..402271e 100644
--- a/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -416,15 +416,11 @@ func allOCRed(bookname string, conn Pipeliner) bool {
return false
}
- // Full wipePattern can match things like 0000.png which getgbook
- // can emit but aren't ocr-able
- //wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.png$`)
- wipePattern := regexp.MustCompile(`[0-9]{6}(.bin)?.png$`)
preprocessedPattern := regexp.MustCompile(`_bin[0-9].[0-9].png$`)
atleastone := false
for _, png := range objs {
- if wipePattern.MatchString(png) || preprocessedPattern.MatchString(png) {
+ if preprocessedPattern.MatchString(png) {
atleastone = true
found := false
b := strings.TrimSuffix(filepath.Base(png), ".png")