diff options
| author | Nick White <git@njw.name> | 2021-06-22 12:24:35 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2021-06-22 12:24:35 +0100 | 
| commit | 74e75ed54d4075c1ffeeba22513a3a2edd25fee5 (patch) | |
| tree | f5f1b50969255a4364811595b9e195e050e49a1f | |
| parent | b6bb87832710590b4a6376bd2630d85967faff9c (diff) | |
rescribe: Add an embedded tessdata
| -rw-r--r-- | cmd/rescribe/main.go | 20 | 
1 files changed, 20 insertions, 0 deletions
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 51a33b2..3325282 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -43,6 +43,9 @@ Note that embedded Tesseract includes these training files:  - rescribev8_fast.traineddata (Latin historic printing)  ` +//go:embed tessdata.20210622.zip +var tessdatazip []byte +  const QueueTimeoutSecs = 2 * 60  const PauseBetweenChecks = 1 * time.Second  const LogSaveTime = 1 * time.Minute @@ -188,6 +191,23 @@ func main() {  			tessCommand = filepath.Join(tessdir, "tesseract.exe")  		// TODO: add linux and osx  		} + +		tessdatadir := filepath.Join(tessdir, "tessdata") +		err = os.MkdirAll(tessdatadir, 0755) +		if err != nil { +			log.Fatalln("Error setting up tessdata directory:", err) +		} +		err = unpackZip(tessdatazip, tessdatadir) +		if err != nil { +			log.Fatalln("Error unpacking embedded tessdata zip:", err) +		} + +		// if trainingPath doesn't exist, set it to the embedded training instead +		_, err = os.Stat(trainingPath) +		if !os.IsExist(err) { +			trainingPath = filepath.Base(trainingPath) +			trainingPath = filepath.Join(tessdatadir, trainingPath) +		}  	}  	f, err := os.Open(trainingPath)  | 
