summaryrefslogtreecommitdiff
path: root/cmd/rescribe
diff options
context:
space:
mode:
authorNick White <git@njw.name>2021-06-22 12:24:35 +0100
committerNick White <git@njw.name>2021-06-22 12:24:35 +0100
commit74e75ed54d4075c1ffeeba22513a3a2edd25fee5 (patch)
treef5f1b50969255a4364811595b9e195e050e49a1f /cmd/rescribe
parentb6bb87832710590b4a6376bd2630d85967faff9c (diff)
rescribe: Add an embedded tessdata
Diffstat (limited to 'cmd/rescribe')
-rw-r--r--cmd/rescribe/main.go20
1 files changed, 20 insertions, 0 deletions
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 51a33b2..3325282 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -43,6 +43,9 @@ Note that embedded Tesseract includes these training files:
- rescribev8_fast.traineddata (Latin historic printing)
`
+//go:embed tessdata.20210622.zip
+var tessdatazip []byte
+
const QueueTimeoutSecs = 2 * 60
const PauseBetweenChecks = 1 * time.Second
const LogSaveTime = 1 * time.Minute
@@ -188,6 +191,23 @@ func main() {
tessCommand = filepath.Join(tessdir, "tesseract.exe")
// TODO: add linux and osx
}
+
+ tessdatadir := filepath.Join(tessdir, "tessdata")
+ err = os.MkdirAll(tessdatadir, 0755)
+ if err != nil {
+ log.Fatalln("Error setting up tessdata directory:", err)
+ }
+ err = unpackZip(tessdatazip, tessdatadir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded tessdata zip:", err)
+ }
+
+ // if trainingPath doesn't exist, set it to the embedded training instead
+ _, err = os.Stat(trainingPath)
+ if !os.IsExist(err) {
+ trainingPath = filepath.Base(trainingPath)
+ trainingPath = filepath.Join(tessdatadir, trainingPath)
+ }
}
f, err := os.Open(trainingPath)