diff options
-rw-r--r-- | cmd/rescribe/embed_darwin.go | 2 | ||||
-rw-r--r-- | cmd/rescribe/embed_darwin_amd64.go | 2 | ||||
-rw-r--r-- | cmd/rescribe/embed_darwin_arm64.go | 2 | ||||
-rw-r--r-- | cmd/rescribe/embed_linux.go | 2 | ||||
-rw-r--r-- | cmd/rescribe/embed_other.go | 7 | ||||
-rw-r--r-- | cmd/rescribe/embed_tessdata.go | 12 | ||||
-rw-r--r-- | cmd/rescribe/embed_windows.go | 2 | ||||
-rw-r--r-- | cmd/rescribe/gui.go | 11 | ||||
-rw-r--r-- | cmd/rescribe/main.go | 70 | ||||
-rw-r--r-- | cmd/rescribe/makefile | 8 |
10 files changed, 82 insertions, 36 deletions
diff --git a/cmd/rescribe/embed_darwin.go b/cmd/rescribe/embed_darwin.go index da781c3..4f22b84 100644 --- a/cmd/rescribe/embed_darwin.go +++ b/cmd/rescribe/embed_darwin.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +//go:build embed + package main import _ "embed" diff --git a/cmd/rescribe/embed_darwin_amd64.go b/cmd/rescribe/embed_darwin_amd64.go index 719c9cc..1f7f8c2 100644 --- a/cmd/rescribe/embed_darwin_amd64.go +++ b/cmd/rescribe/embed_darwin_amd64.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +//go:build embed + package main import _ "embed" diff --git a/cmd/rescribe/embed_darwin_arm64.go b/cmd/rescribe/embed_darwin_arm64.go index a1ca9b8..4c154be 100644 --- a/cmd/rescribe/embed_darwin_arm64.go +++ b/cmd/rescribe/embed_darwin_arm64.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +//go:build embed + package main import _ "embed" diff --git a/cmd/rescribe/embed_linux.go b/cmd/rescribe/embed_linux.go index eb09dce..3cfd18b 100644 --- a/cmd/rescribe/embed_linux.go +++ b/cmd/rescribe/embed_linux.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +//go:build embed + package main import _ "embed" diff --git a/cmd/rescribe/embed_other.go b/cmd/rescribe/embed_other.go index 86848d2..ac9ce3a 100644 --- a/cmd/rescribe/embed_other.go +++ b/cmd/rescribe/embed_other.go @@ -2,13 +2,12 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. -// +build !darwin -// +build !linux -// +build !windows +//go:build (!darwin && !linux && !windows) || !embed package main // if not one of the above platforms, we won't embed anything, so -// just create an empty byte slice +// just create empty byte slices var tesszip []byte var gbookzip []byte +var tessdatazip []byte diff --git a/cmd/rescribe/embed_tessdata.go b/cmd/rescribe/embed_tessdata.go new file mode 100644 index 0000000..ea9ce8f --- /dev/null +++ b/cmd/rescribe/embed_tessdata.go @@ -0,0 +1,12 @@ +// Copyright 2022 Nick White. +// Use of this source code is governed by the GPLv3 +// license that can be found in the LICENSE file. + +//go:build embed + +package main + +import _ "embed" + +//go:embed tessdata.20220322.zip +var tessdatazip []byte diff --git a/cmd/rescribe/embed_windows.go b/cmd/rescribe/embed_windows.go index 3e49161..f3fe193 100644 --- a/cmd/rescribe/embed_windows.go +++ b/cmd/rescribe/embed_windows.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +//go:build embed + package main import _ "embed" diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index f4a622d..5031f0d 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -438,6 +438,17 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess progressBar.SetValue(0.1) if strings.HasPrefix(dir.Text, "Google Book: ") { + if gbookcmd == "" { + msg := fmt.Sprintf("No getgbook found, can't download Google Book. Either set -gbookcmd on the command line, or use the official build which includes an embedded copy of getgbook.\n") + dialog.ShowError(errors.New(msg), myWindow) + fmt.Fprintf(os.Stderr, msg) + progressBar.SetValue(0.0) + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + return + } progressBar.SetValue(0.11) start := len("Google Book: ") bookname = dir.Text[start : start+12] diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index fd5b33b..16ca024 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -13,7 +13,6 @@ import ( "archive/zip" "bytes" "context" - _ "embed" "flag" "fmt" "image/jpeg" @@ -35,7 +34,7 @@ import ( "rescribe.xyz/utils/pkg/hocr" ) -const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd] [-t training] bookdir/book.pdf [savedir] +const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd cmd] [-gbookcmd cmd] [-t training] bookdir/book.pdf [savedir] Process and OCR a book using the Rescribe pipeline on a local machine. @@ -43,9 +42,6 @@ OCR results are saved into the bookdir directory unless savedir is specified. ` -//go:embed tessdata.20220322.zip -var tessdatazip []byte - const QueueTimeoutSecs = 2 * 60 const PauseBetweenChecks = 1 * time.Second const LogSaveTime = 1 * time.Minute @@ -95,7 +91,7 @@ func resetTimer(t *time.Timer, d time.Duration) { } } -// unpackTessZip unpacks a byte array of a zip file into a directory +// unpackZip unpacks a byte array of a zip file into a directory func unpackZip(b []byte, dir string) error { br := bytes.NewReader(b) zr, err := zip.NewReader(br, br.Size()) @@ -140,8 +136,10 @@ func unpackZip(b []byte, dir string) error { func main() { deftesscmd := "tesseract" + defgbookcmd := "getgbook" if runtime.GOOS == "windows" { deftesscmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" + defgbookcmd = "getgbook.exe" } verbose := flag.Bool("v", false, "verbose") @@ -153,6 +151,7 @@ These training files are included in rescribe, and are always available: - lat.traineddata (Latin, modern print) - rescribev9_fast.traineddata (Latin/English/French, printed ca 1500-1800) `) + gbookcmd := flag.String("gbookcmd", defgbookcmd, "The getgbook executable to run. You may need to set this to the full path of getgbook.exe if you're on Windows.") tesscmd := flag.String("tesscmd", deftesscmd, "The Tesseract executable to run. You may need to set this to the full path of Tesseract.exe if you're on Windows.") wipe := flag.Bool("wipe", false, "Use wiper tool to remove noise like gutters from page before processing.") fullpdf := flag.Bool("fullpdf", false, "Use highest image quality for searchable PDF (requires lots of RAM).") @@ -187,7 +186,7 @@ These training files are included in rescribe, and are always available: log.Fatalln("Error setting up tesseract directory:", err) } - if !*systess { + if !*systess && len(tesszip) > 0 { err = unpackZip(tesszip, tessdir) if err != nil { log.Fatalln("Error unpacking embedded Tesseract zip:", err) @@ -202,18 +201,31 @@ These training files are included in rescribe, and are always available: } } - err = unpackZip(gbookzip, tessdir) + _, err = exec.LookPath(tessCommand) if err != nil { - log.Fatalln("Error unpacking embedded getgbook zip:", err) + log.Fatalf("No tesseract executable found [tried %s], either set -tesscmd and -systess on the command line or use the official build which includes an embedded copy of Tesseract.", tessCommand) + } + + gbookCommand := *gbookcmd + if len(gbookzip) > 0 { + err = unpackZip(gbookzip, tessdir) + if err != nil { + log.Fatalln("Error unpacking embedded getgbook zip:", err) + } + switch runtime.GOOS { + case "darwin": + gbookCommand = filepath.Join(tessdir, "getgbook") + case "linux": + gbookCommand = filepath.Join(tessdir, "getgbook") + case "windows": + gbookCommand = filepath.Join(tessdir, "getgbook.exe") + } } - var gbookCommand string - switch runtime.GOOS { - case "darwin": - gbookCommand = filepath.Join(tessdir, "getgbook") - case "linux": - gbookCommand = filepath.Join(tessdir, "getgbook") - case "windows": - gbookCommand = filepath.Join(tessdir, "getgbook.exe") + + _, err = exec.LookPath(gbookCommand) + if err != nil { + log.Printf("No getgbook found [tried %s], google book downloading will be disabled, either set -gbookcmd on the command line or use the official build which includes an embedded getgbook.", gbookCommand) + gbookCommand = "" } tessdatadir := filepath.Join(tessdir, "tessdata") @@ -221,9 +233,11 @@ These training files are included in rescribe, and are always available: if err != nil { log.Fatalln("Error setting up tessdata directory:", err) } - err = unpackZip(tessdatazip, tessdatadir) - if err != nil { - log.Fatalln("Error unpacking embedded tessdata zip:", err) + if len(tessdatazip) > 0 { + err = unpackZip(tessdatazip, tessdatadir) + if err != nil { + log.Fatalln("Error unpacking embedded tessdata zip:", err) + } } // if trainingPath doesn't exist, set it to the embedded training instead @@ -233,14 +247,6 @@ These training files are included in rescribe, and are always available: trainingPath = filepath.Join(tessdatadir, trainingPath) } - f, err := os.Open(trainingPath) - if err != nil { - fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath) - fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n") - os.Exit(1) - } - f.Close() - abstraining, err := filepath.Abs(trainingPath) if err != nil { log.Fatalf("Error getting absolute path of training %s: %v", trainingPath, err) @@ -265,6 +271,14 @@ These training files are included in rescribe, and are always available: return } + f, err := os.Open(trainingPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath) + fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n") + os.Exit(1) + } + f.Close() + bookdir := flag.Arg(0) bookname := strings.ReplaceAll(filepath.Base(bookdir), " ", "_") savedir := bookdir diff --git a/cmd/rescribe/makefile b/cmd/rescribe/makefile index ae92dda..23f17fb 100644 --- a/cmd/rescribe/makefile +++ b/cmd/rescribe/makefile @@ -26,17 +26,17 @@ all: dist/linux/rescribe dist/darwin/rescribe.zip dist/windows/rescribe.exe dist/linux/rescribe: $(GODEPS) go generate mkdir -p dist/linux - GOOS=linux GOARCH=amd64 go build -o $@ . + GOOS=linux GOARCH=amd64 go build -tags embed -o $@ . build/darwin_amd64/rescribe: $(GODEPS) go generate mkdir -p build/darwin_amd64 - PATH="$(PATH):$(OSXCROSSBIN)" CC="o64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -o $@ . + PATH="$(PATH):$(OSXCROSSBIN)" CC="o64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -tags embed -o $@ . build/darwin_arm64/rescribe: $(GODEPS) go generate mkdir -p build/darwin_arm64 - PATH="$(PATH):$(OSXCROSSBIN)" CC="oa64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -o $@ . + PATH="$(PATH):$(OSXCROSSBIN)" CC="oa64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -tags embed -o $@ . build/darwin/rescribe: build/darwin_amd64/rescribe build/darwin_arm64/rescribe mkdir -p build/darwin @@ -54,7 +54,7 @@ dist/darwin/rescribe.zip: build/darwin/Rescribe.app build/windows/rescribe-bin.exe: $(GODEPS) go generate mkdir -p build/windows - CC="x86_64-w64-mingw32-gcc" CGO_ENABLED=1 GOOS=windows GOARCH=amd64 go build -o $@ . + CC="x86_64-w64-mingw32-gcc" CGO_ENABLED=1 GOOS=windows GOARCH=amd64 go build -tags embed -o $@ . dist/windows/rescribe.exe: build/windows/rescribe-bin.exe mkdir -p dist/windows |