From 74e75ed54d4075c1ffeeba22513a3a2edd25fee5 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 22 Jun 2021 12:24:35 +0100 Subject: rescribe: Add an embedded tessdata --- cmd/rescribe/main.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'cmd/rescribe/main.go') diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 51a33b2..3325282 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -43,6 +43,9 @@ Note that embedded Tesseract includes these training files: - rescribev8_fast.traineddata (Latin historic printing) ` +//go:embed tessdata.20210622.zip +var tessdatazip []byte + const QueueTimeoutSecs = 2 * 60 const PauseBetweenChecks = 1 * time.Second const LogSaveTime = 1 * time.Minute @@ -188,6 +191,23 @@ func main() { tessCommand = filepath.Join(tessdir, "tesseract.exe") // TODO: add linux and osx } + + tessdatadir := filepath.Join(tessdir, "tessdata") + err = os.MkdirAll(tessdatadir, 0755) + if err != nil { + log.Fatalln("Error setting up tessdata directory:", err) + } + err = unpackZip(tessdatazip, tessdatadir) + if err != nil { + log.Fatalln("Error unpacking embedded tessdata zip:", err) + } + + // if trainingPath doesn't exist, set it to the embedded training instead + _, err = os.Stat(trainingPath) + if !os.IsExist(err) { + trainingPath = filepath.Base(trainingPath) + trainingPath = filepath.Join(tessdatadir, trainingPath) + } } f, err := os.Open(trainingPath) -- cgit v1.2.1-24-ge1ad