summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-09-05 22:24:37 +0100
committerNick White <git@njw.name>2019-09-05 22:42:30 +0100
commit561d8461cbe19316762489cd7b04f95b9014bcda (patch)
tree7d3409f3279174cab7707b024353d602d3a5f18e
parent60a198f7ee5843a0f77b6dfb845c3b0413e83705 (diff)
Handle no words found error in a better way so any page that is actually 0 confidence is recognised
-rw-r--r--bookpipeline/cmd/bookpipeline/main.go6
-rw-r--r--lib/hocr/hocr.go2
2 files changed, 4 insertions, 4 deletions
diff --git a/bookpipeline/cmd/bookpipeline/main.go b/bookpipeline/cmd/bookpipeline/main.go
index 97f4109..ec6a08d 100644
--- a/bookpipeline/cmd/bookpipeline/main.go
+++ b/bookpipeline/cmd/bookpipeline/main.go
@@ -149,6 +149,9 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
}
logger.Println("Calculating confidence for", path)
avg, err := hocr.GetAvgConf(path)
+ if err != nil && err.Error() == "No words found" {
+ continue
+ }
if err != nil {
for range toanalyse {
} // consume the rest of the receiving channel so it isn't blocked
@@ -156,9 +159,6 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log
errc <- errors.New(fmt.Sprintf("Error retreiving confidence for %s: %s", path, err))
return
}
- if avg == 0 {
- continue
- }
base := filepath.Base(path)
codestart := strings.Index(base, "_bin")
name := base[0:codestart]
diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go
index 052aa19..dcd0494 100644
--- a/lib/hocr/hocr.go
+++ b/lib/hocr/hocr.go
@@ -123,7 +123,7 @@ func GetAvgConf(hocrfn string) (float64, error) {
}
}
if num == 0 {
- return 0, nil
+ return 0, errors.New("No words found")
}
return total / num, nil
}