summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/rescribe/embed_darwin.go2
-rw-r--r--cmd/rescribe/embed_darwin_amd64.go2
-rw-r--r--cmd/rescribe/embed_darwin_arm64.go2
-rw-r--r--cmd/rescribe/embed_linux.go2
-rw-r--r--cmd/rescribe/embed_other.go7
-rw-r--r--cmd/rescribe/embed_tessdata.go12
-rw-r--r--cmd/rescribe/embed_windows.go2
-rw-r--r--cmd/rescribe/gui.go11
-rw-r--r--cmd/rescribe/main.go70
-rw-r--r--cmd/rescribe/makefile8
10 files changed, 82 insertions, 36 deletions
diff --git a/cmd/rescribe/embed_darwin.go b/cmd/rescribe/embed_darwin.go
index da781c3..4f22b84 100644
--- a/cmd/rescribe/embed_darwin.go
+++ b/cmd/rescribe/embed_darwin.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_darwin_amd64.go b/cmd/rescribe/embed_darwin_amd64.go
index 719c9cc..1f7f8c2 100644
--- a/cmd/rescribe/embed_darwin_amd64.go
+++ b/cmd/rescribe/embed_darwin_amd64.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_darwin_arm64.go b/cmd/rescribe/embed_darwin_arm64.go
index a1ca9b8..4c154be 100644
--- a/cmd/rescribe/embed_darwin_arm64.go
+++ b/cmd/rescribe/embed_darwin_arm64.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_linux.go b/cmd/rescribe/embed_linux.go
index eb09dce..3cfd18b 100644
--- a/cmd/rescribe/embed_linux.go
+++ b/cmd/rescribe/embed_linux.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_other.go b/cmd/rescribe/embed_other.go
index 86848d2..ac9ce3a 100644
--- a/cmd/rescribe/embed_other.go
+++ b/cmd/rescribe/embed_other.go
@@ -2,13 +2,12 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
-// +build !darwin
-// +build !linux
-// +build !windows
+//go:build (!darwin && !linux && !windows) || !embed
package main
// if not one of the above platforms, we won't embed anything, so
-// just create an empty byte slice
+// just create empty byte slices
var tesszip []byte
var gbookzip []byte
+var tessdatazip []byte
diff --git a/cmd/rescribe/embed_tessdata.go b/cmd/rescribe/embed_tessdata.go
new file mode 100644
index 0000000..ea9ce8f
--- /dev/null
+++ b/cmd/rescribe/embed_tessdata.go
@@ -0,0 +1,12 @@
+// Copyright 2022 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+//go:build embed
+
+package main
+
+import _ "embed"
+
+//go:embed tessdata.20220322.zip
+var tessdatazip []byte
diff --git a/cmd/rescribe/embed_windows.go b/cmd/rescribe/embed_windows.go
index 3e49161..f3fe193 100644
--- a/cmd/rescribe/embed_windows.go
+++ b/cmd/rescribe/embed_windows.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index f4a622d..5031f0d 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -438,6 +438,17 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
progressBar.SetValue(0.1)
if strings.HasPrefix(dir.Text, "Google Book: ") {
+ if gbookcmd == "" {
+ msg := fmt.Sprintf("No getgbook found, can't download Google Book. Either set -gbookcmd on the command line, or use the official build which includes an embedded copy of getgbook.\n")
+ dialog.ShowError(errors.New(msg), myWindow)
+ fmt.Fprintf(os.Stderr, msg)
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ return
+ }
progressBar.SetValue(0.11)
start := len("Google Book: ")
bookname = dir.Text[start : start+12]
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index fd5b33b..16ca024 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -13,7 +13,6 @@ import (
"archive/zip"
"bytes"
"context"
- _ "embed"
"flag"
"fmt"
"image/jpeg"
@@ -35,7 +34,7 @@ import (
"rescribe.xyz/utils/pkg/hocr"
)
-const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd] [-t training] bookdir/book.pdf [savedir]
+const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd cmd] [-gbookcmd cmd] [-t training] bookdir/book.pdf [savedir]
Process and OCR a book using the Rescribe pipeline on a local machine.
@@ -43,9 +42,6 @@ OCR results are saved into the bookdir directory unless savedir is
specified.
`
-//go:embed tessdata.20220322.zip
-var tessdatazip []byte
-
const QueueTimeoutSecs = 2 * 60
const PauseBetweenChecks = 1 * time.Second
const LogSaveTime = 1 * time.Minute
@@ -95,7 +91,7 @@ func resetTimer(t *time.Timer, d time.Duration) {
}
}
-// unpackTessZip unpacks a byte array of a zip file into a directory
+// unpackZip unpacks a byte array of a zip file into a directory
func unpackZip(b []byte, dir string) error {
br := bytes.NewReader(b)
zr, err := zip.NewReader(br, br.Size())
@@ -140,8 +136,10 @@ func unpackZip(b []byte, dir string) error {
func main() {
deftesscmd := "tesseract"
+ defgbookcmd := "getgbook"
if runtime.GOOS == "windows" {
deftesscmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
+ defgbookcmd = "getgbook.exe"
}
verbose := flag.Bool("v", false, "verbose")
@@ -153,6 +151,7 @@ These training files are included in rescribe, and are always available:
- lat.traineddata (Latin, modern print)
- rescribev9_fast.traineddata (Latin/English/French, printed ca 1500-1800)
`)
+ gbookcmd := flag.String("gbookcmd", defgbookcmd, "The getgbook executable to run. You may need to set this to the full path of getgbook.exe if you're on Windows.")
tesscmd := flag.String("tesscmd", deftesscmd, "The Tesseract executable to run. You may need to set this to the full path of Tesseract.exe if you're on Windows.")
wipe := flag.Bool("wipe", false, "Use wiper tool to remove noise like gutters from page before processing.")
fullpdf := flag.Bool("fullpdf", false, "Use highest image quality for searchable PDF (requires lots of RAM).")
@@ -187,7 +186,7 @@ These training files are included in rescribe, and are always available:
log.Fatalln("Error setting up tesseract directory:", err)
}
- if !*systess {
+ if !*systess && len(tesszip) > 0 {
err = unpackZip(tesszip, tessdir)
if err != nil {
log.Fatalln("Error unpacking embedded Tesseract zip:", err)
@@ -202,18 +201,31 @@ These training files are included in rescribe, and are always available:
}
}
- err = unpackZip(gbookzip, tessdir)
+ _, err = exec.LookPath(tessCommand)
if err != nil {
- log.Fatalln("Error unpacking embedded getgbook zip:", err)
+ log.Fatalf("No tesseract executable found [tried %s], either set -tesscmd and -systess on the command line or use the official build which includes an embedded copy of Tesseract.", tessCommand)
+ }
+
+ gbookCommand := *gbookcmd
+ if len(gbookzip) > 0 {
+ err = unpackZip(gbookzip, tessdir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded getgbook zip:", err)
+ }
+ switch runtime.GOOS {
+ case "darwin":
+ gbookCommand = filepath.Join(tessdir, "getgbook")
+ case "linux":
+ gbookCommand = filepath.Join(tessdir, "getgbook")
+ case "windows":
+ gbookCommand = filepath.Join(tessdir, "getgbook.exe")
+ }
}
- var gbookCommand string
- switch runtime.GOOS {
- case "darwin":
- gbookCommand = filepath.Join(tessdir, "getgbook")
- case "linux":
- gbookCommand = filepath.Join(tessdir, "getgbook")
- case "windows":
- gbookCommand = filepath.Join(tessdir, "getgbook.exe")
+
+ _, err = exec.LookPath(gbookCommand)
+ if err != nil {
+ log.Printf("No getgbook found [tried %s], google book downloading will be disabled, either set -gbookcmd on the command line or use the official build which includes an embedded getgbook.", gbookCommand)
+ gbookCommand = ""
}
tessdatadir := filepath.Join(tessdir, "tessdata")
@@ -221,9 +233,11 @@ These training files are included in rescribe, and are always available:
if err != nil {
log.Fatalln("Error setting up tessdata directory:", err)
}
- err = unpackZip(tessdatazip, tessdatadir)
- if err != nil {
- log.Fatalln("Error unpacking embedded tessdata zip:", err)
+ if len(tessdatazip) > 0 {
+ err = unpackZip(tessdatazip, tessdatadir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded tessdata zip:", err)
+ }
}
// if trainingPath doesn't exist, set it to the embedded training instead
@@ -233,14 +247,6 @@ These training files are included in rescribe, and are always available:
trainingPath = filepath.Join(tessdatadir, trainingPath)
}
- f, err := os.Open(trainingPath)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath)
- fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n")
- os.Exit(1)
- }
- f.Close()
-
abstraining, err := filepath.Abs(trainingPath)
if err != nil {
log.Fatalf("Error getting absolute path of training %s: %v", trainingPath, err)
@@ -265,6 +271,14 @@ These training files are included in rescribe, and are always available:
return
}
+ f, err := os.Open(trainingPath)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath)
+ fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n")
+ os.Exit(1)
+ }
+ f.Close()
+
bookdir := flag.Arg(0)
bookname := strings.ReplaceAll(filepath.Base(bookdir), " ", "_")
savedir := bookdir
diff --git a/cmd/rescribe/makefile b/cmd/rescribe/makefile
index ae92dda..23f17fb 100644
--- a/cmd/rescribe/makefile
+++ b/cmd/rescribe/makefile
@@ -26,17 +26,17 @@ all: dist/linux/rescribe dist/darwin/rescribe.zip dist/windows/rescribe.exe
dist/linux/rescribe: $(GODEPS)
go generate
mkdir -p dist/linux
- GOOS=linux GOARCH=amd64 go build -o $@ .
+ GOOS=linux GOARCH=amd64 go build -tags embed -o $@ .
build/darwin_amd64/rescribe: $(GODEPS)
go generate
mkdir -p build/darwin_amd64
- PATH="$(PATH):$(OSXCROSSBIN)" CC="o64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -o $@ .
+ PATH="$(PATH):$(OSXCROSSBIN)" CC="o64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -tags embed -o $@ .
build/darwin_arm64/rescribe: $(GODEPS)
go generate
mkdir -p build/darwin_arm64
- PATH="$(PATH):$(OSXCROSSBIN)" CC="oa64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -o $@ .
+ PATH="$(PATH):$(OSXCROSSBIN)" CC="oa64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -tags embed -o $@ .
build/darwin/rescribe: build/darwin_amd64/rescribe build/darwin_arm64/rescribe
mkdir -p build/darwin
@@ -54,7 +54,7 @@ dist/darwin/rescribe.zip: build/darwin/Rescribe.app
build/windows/rescribe-bin.exe: $(GODEPS)
go generate
mkdir -p build/windows
- CC="x86_64-w64-mingw32-gcc" CGO_ENABLED=1 GOOS=windows GOARCH=amd64 go build -o $@ .
+ CC="x86_64-w64-mingw32-gcc" CGO_ENABLED=1 GOOS=windows GOARCH=amd64 go build -tags embed -o $@ .
dist/windows/rescribe.exe: build/windows/rescribe-bin.exe
mkdir -p dist/windows