diff options
author | Nick White <git@njw.name> | 2021-06-22 12:24:35 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2021-06-22 12:24:35 +0100 |
commit | 74e75ed54d4075c1ffeeba22513a3a2edd25fee5 (patch) | |
tree | f5f1b50969255a4364811595b9e195e050e49a1f | |
parent | b6bb87832710590b4a6376bd2630d85967faff9c (diff) |
rescribe: Add an embedded tessdata
-rw-r--r-- | cmd/rescribe/main.go | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 51a33b2..3325282 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -43,6 +43,9 @@ Note that embedded Tesseract includes these training files: - rescribev8_fast.traineddata (Latin historic printing) ` +//go:embed tessdata.20210622.zip +var tessdatazip []byte + const QueueTimeoutSecs = 2 * 60 const PauseBetweenChecks = 1 * time.Second const LogSaveTime = 1 * time.Minute @@ -188,6 +191,23 @@ func main() { tessCommand = filepath.Join(tessdir, "tesseract.exe") // TODO: add linux and osx } + + tessdatadir := filepath.Join(tessdir, "tessdata") + err = os.MkdirAll(tessdatadir, 0755) + if err != nil { + log.Fatalln("Error setting up tessdata directory:", err) + } + err = unpackZip(tessdatazip, tessdatadir) + if err != nil { + log.Fatalln("Error unpacking embedded tessdata zip:", err) + } + + // if trainingPath doesn't exist, set it to the embedded training instead + _, err = os.Stat(trainingPath) + if !os.IsExist(err) { + trainingPath = filepath.Base(trainingPath) + trainingPath = filepath.Join(tessdatadir, trainingPath) + } } f, err := os.Open(trainingPath) |