diff options
author | Nick White <git@njw.name> | 2019-09-05 22:24:37 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-09-05 22:42:30 +0100 |
commit | 561d8461cbe19316762489cd7b04f95b9014bcda (patch) | |
tree | 7d3409f3279174cab7707b024353d602d3a5f18e | |
parent | 60a198f7ee5843a0f77b6dfb845c3b0413e83705 (diff) |
Handle no words found error in a better way so any page that is actually 0 confidence is recognised
-rw-r--r-- | bookpipeline/cmd/bookpipeline/main.go | 6 | ||||
-rw-r--r-- | lib/hocr/hocr.go | 2 |
2 files changed, 4 insertions, 4 deletions
diff --git a/bookpipeline/cmd/bookpipeline/main.go b/bookpipeline/cmd/bookpipeline/main.go index 97f4109..ec6a08d 100644 --- a/bookpipeline/cmd/bookpipeline/main.go +++ b/bookpipeline/cmd/bookpipeline/main.go @@ -149,6 +149,9 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log } logger.Println("Calculating confidence for", path) avg, err := hocr.GetAvgConf(path) + if err != nil && err.Error() == "No words found" { + continue + } if err != nil { for range toanalyse { } // consume the rest of the receiving channel so it isn't blocked @@ -156,9 +159,6 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log errc <- errors.New(fmt.Sprintf("Error retreiving confidence for %s: %s", path, err)) return } - if avg == 0 { - continue - } base := filepath.Base(path) codestart := strings.Index(base, "_bin") name := base[0:codestart] diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go index 052aa19..dcd0494 100644 --- a/lib/hocr/hocr.go +++ b/lib/hocr/hocr.go @@ -123,7 +123,7 @@ func GetAvgConf(hocrfn string) (float64, error) { } } if num == 0 { - return 0, nil + return 0, errors.New("No words found") } return total / num, nil } |