From 561d8461cbe19316762489cd7b04f95b9014bcda Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 5 Sep 2019 22:24:37 +0100 Subject: Handle no words found error in a better way so any page that is actually 0 confidence is recognised --- bookpipeline/cmd/bookpipeline/main.go | 6 +++--- lib/hocr/hocr.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bookpipeline/cmd/bookpipeline/main.go b/bookpipeline/cmd/bookpipeline/main.go index 97f4109..ec6a08d 100644 --- a/bookpipeline/cmd/bookpipeline/main.go +++ b/bookpipeline/cmd/bookpipeline/main.go @@ -149,6 +149,9 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log } logger.Println("Calculating confidence for", path) avg, err := hocr.GetAvgConf(path) + if err != nil && err.Error() == "No words found" { + continue + } if err != nil { for range toanalyse { } // consume the rest of the receiving channel so it isn't blocked @@ -156,9 +159,6 @@ func analyse(toanalyse chan string, up chan string, errc chan error, logger *log errc <- errors.New(fmt.Sprintf("Error retreiving confidence for %s: %s", path, err)) return } - if avg == 0 { - continue - } base := filepath.Base(path) codestart := strings.Index(base, "_bin") name := base[0:codestart] diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go index 052aa19..dcd0494 100644 --- a/lib/hocr/hocr.go +++ b/lib/hocr/hocr.go @@ -123,7 +123,7 @@ func GetAvgConf(hocrfn string) (float64, error) { } } if num == 0 { - return 0, nil + return 0, errors.New("No words found") } return total / num, nil } -- cgit v1.2.1-24-ge1ad