From f71fd636f151e5cb7eafb2ae6c21c1c188d43fdd Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 17 Nov 2020 12:24:42 +0000 Subject: Remove _bin0.x from txt filenames --- cmd/rescribe/main.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'cmd') diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 2320a2c..f4489d8 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -34,6 +34,7 @@ Process and OCR a book using the Rescribe pipeline on a local machine. const QueueTimeoutSecs = 2 * 60 const PauseBetweenChecks = 1 * time.Second const LogSaveTime = 1 * time.Minute +var thresholds = []float64{0.1, 0.2, 0.3} // null writer to enable non-verbose logging to be discarded type NullWriter bool @@ -222,8 +223,11 @@ func addTxtVersion(hocrfn string) error { return fmt.Errorf("Error getting text from hocr file %s: %v", hocrfn, err) } - basefn := strings.TrimSuffix(filepath.Base(hocrfn), ".hocr") + ".txt" - fn := filepath.Join(dir, "text", basefn) + basefn := filepath.Base(hocrfn) + for _, v := range thresholds { + basefn = strings.TrimSuffix(basefn, fmt.Sprintf("_bin%.1f.hocr", v)) + } + fn := filepath.Join(dir, "text", basefn + ".txt") err = ioutil.WriteFile(fn, []byte(t), 0644) if err != nil { @@ -312,7 +316,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error { stopTimer(stopIfQuiet) conn.Log("Message received on preprocess queue, processing", msg.Body) fmt.Printf(" Preprocessing book (binarising and wiping)\n") - err = pipeline.ProcessBook(msg, conn, pipeline.Preprocess([]float64{0.1, 0.2, 0.3}), origPattern, conn.PreQueueId(), conn.OCRPageQueueId()) + err = pipeline.ProcessBook(msg, conn, pipeline.Preprocess(thresholds), origPattern, conn.PreQueueId(), conn.OCRPageQueueId()) fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output resetTimer(stopIfQuiet, quietTime) if err != nil { -- cgit v1.2.1-24-ge1ad