From 1076afbdab1a976af3e48646f01e1fbe4e954028 Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 21 Mar 2022 17:35:44 +0000 Subject: Update tessdata to only include a few trainings --- cmd/rescribe/getembeds.go | 1 + cmd/rescribe/gui.go | 10 ++++------ cmd/rescribe/main.go | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cmd/rescribe/getembeds.go b/cmd/rescribe/getembeds.go index b9b59ae..3d02f7e 100644 --- a/cmd/rescribe/getembeds.go +++ b/cmd/rescribe/getembeds.go @@ -86,6 +86,7 @@ func main() { sum string }{ {"https://rescribe.xyz/rescribe/embeds/tessdata.20211001.zip", "5c90ae69b9e449d85e84b4806a54d6739b572730525010483e512a62a527b030"}, + {"https://rescribe.xyz/rescribe/embeds/tessdata.20220321.zip", "c6dddf99ad719b29fd6bde1a416a51674bd1834d2df8e519313d584e759a8e0e"}, {"https://rescribe.xyz/rescribe/embeds/tesseract-linux-v5.0.0-alpha.20210510.zip", "81cfba632b8aaf0a00180b1aa62d357d50f343b0e9bd51b941ee14c289ccd889"}, {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-v4.1.1.20191227.zip", "5f567b95f1dea9d0581ad42ada4d1f1160a38ea22ae338f9efe190015265636b"}, {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-m1-v4.1.1.20210802.zip", "c9a454633f7e5175e2d50dd939d30a6e5bdfb3b8c78590a08b5aa21edbf32ca4"}, diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index f96a342..cf3796c 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -35,11 +35,9 @@ var progressPoints = map[float64]string{ } var trainingNames = map[string]string{ - "carolinemsv1_fast": "Caroline Miniscule", - "eng": "English (modern printing)", - "lat": "Latin (modern printing)", - "rescribefrav2_fast": "French (early printing)", - "rescribev8_fast": "Latin (early printing)", + "eng": "English (modern print)", + "lat": "Latin (modern print)", + "rescribev8_fast": "Latin/English/French (printed ca 1500-1800)", } // copyStdoutToChan creates a pipe to copy anything written @@ -489,7 +487,7 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess // happens if extractPdfImgs recovers from a PDF panic, // which will occur if we encounter an image we can't decode if bookdir == "" { - msg := fmt.Sprintf("Error opening PDF\nThe format of this PDF is not supported, extract the images manually into a folder first.\n") + msg := fmt.Sprintf("Error opening PDF\nThe format of this PDF is not supported, extract the images to .jpg manually into a folder first.\n") dialog.ShowError(errors.New(msg), myWindow) fmt.Fprintf(os.Stderr, msg) diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 53bbe3c..4b6fcca 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -43,7 +43,7 @@ OCR results are saved into the bookdir directory unless savedir is specified. ` -//go:embed tessdata.20211001.zip +//go:embed tessdata.20220321.zip var tessdatazip []byte const QueueTimeoutSecs = 2 * 60 -- cgit v1.2.1-24-ge1ad