summaryrefslogtreecommitdiff
path: root/cmd/rescribe
diff options
context:
space:
mode:
authorNick White <git@njw.name>2022-10-27 16:27:55 +0100
committerNick White <git@njw.name>2022-10-27 16:30:37 +0100
commit811601e4b446b1c598af965b74155f7f76ffb7e9 (patch)
treeb61c09c24c41b714f07cbbfee7b06aab6c534c08 /cmd/rescribe
parent4eaded095d755e0988e7cd8b32c7dab8ec6d0967 (diff)
Allow completely non-embedded builds
This enables installs straight from 'go install' or 'fyne install'. It also means warning if a system getgbook isn't found, and erroring if tesseract isn't found (as was done already). The location of getgbook can therefore now be specified on the command line. Embedded builds are enabled with the -tags embed flag, which the makefile sets for all builds.
Diffstat (limited to 'cmd/rescribe')
-rw-r--r--cmd/rescribe/embed_darwin.go2
-rw-r--r--cmd/rescribe/embed_darwin_amd64.go2
-rw-r--r--cmd/rescribe/embed_darwin_arm64.go2
-rw-r--r--cmd/rescribe/embed_linux.go2
-rw-r--r--cmd/rescribe/embed_other.go7
-rw-r--r--cmd/rescribe/embed_tessdata.go12
-rw-r--r--cmd/rescribe/embed_windows.go2
-rw-r--r--cmd/rescribe/gui.go11
-rw-r--r--cmd/rescribe/main.go70
-rw-r--r--cmd/rescribe/makefile8
10 files changed, 82 insertions, 36 deletions
diff --git a/cmd/rescribe/embed_darwin.go b/cmd/rescribe/embed_darwin.go
index da781c3..4f22b84 100644
--- a/cmd/rescribe/embed_darwin.go
+++ b/cmd/rescribe/embed_darwin.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_darwin_amd64.go b/cmd/rescribe/embed_darwin_amd64.go
index 719c9cc..1f7f8c2 100644
--- a/cmd/rescribe/embed_darwin_amd64.go
+++ b/cmd/rescribe/embed_darwin_amd64.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_darwin_arm64.go b/cmd/rescribe/embed_darwin_arm64.go
index a1ca9b8..4c154be 100644
--- a/cmd/rescribe/embed_darwin_arm64.go
+++ b/cmd/rescribe/embed_darwin_arm64.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_linux.go b/cmd/rescribe/embed_linux.go
index eb09dce..3cfd18b 100644
--- a/cmd/rescribe/embed_linux.go
+++ b/cmd/rescribe/embed_linux.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_other.go b/cmd/rescribe/embed_other.go
index 86848d2..ac9ce3a 100644
--- a/cmd/rescribe/embed_other.go
+++ b/cmd/rescribe/embed_other.go
@@ -2,13 +2,12 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
-// +build !darwin
-// +build !linux
-// +build !windows
+//go:build (!darwin && !linux && !windows) || !embed
package main
// if not one of the above platforms, we won't embed anything, so
-// just create an empty byte slice
+// just create empty byte slices
var tesszip []byte
var gbookzip []byte
+var tessdatazip []byte
diff --git a/cmd/rescribe/embed_tessdata.go b/cmd/rescribe/embed_tessdata.go
new file mode 100644
index 0000000..ea9ce8f
--- /dev/null
+++ b/cmd/rescribe/embed_tessdata.go
@@ -0,0 +1,12 @@
+// Copyright 2022 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+//go:build embed
+
+package main
+
+import _ "embed"
+
+//go:embed tessdata.20220322.zip
+var tessdatazip []byte
diff --git a/cmd/rescribe/embed_windows.go b/cmd/rescribe/embed_windows.go
index 3e49161..f3fe193 100644
--- a/cmd/rescribe/embed_windows.go
+++ b/cmd/rescribe/embed_windows.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index f4a622d..5031f0d 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -438,6 +438,17 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
progressBar.SetValue(0.1)
if strings.HasPrefix(dir.Text, "Google Book: ") {
+ if gbookcmd == "" {
+ msg := fmt.Sprintf("No getgbook found, can't download Google Book. Either set -gbookcmd on the command line, or use the official build which includes an embedded copy of getgbook.\n")
+ dialog.ShowError(errors.New(msg), myWindow)
+ fmt.Fprintf(os.Stderr, msg)
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ return
+ }
progressBar.SetValue(0.11)
start := len("Google Book: ")
bookname = dir.Text[start : start+12]
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index fd5b33b..16ca024 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -13,7 +13,6 @@ import (
"archive/zip"
"bytes"
"context"
- _ "embed"
"flag"
"fmt"
"image/jpeg"
@@ -35,7 +34,7 @@ import (
"rescribe.xyz/utils/pkg/hocr"
)
-const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd] [-t training] bookdir/book.pdf [savedir]
+const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd cmd] [-gbookcmd cmd] [-t training] bookdir/book.pdf [savedir]
Process and OCR a book using the Rescribe pipeline on a local machine.
@@ -43,9 +42,6 @@ OCR results are saved into the bookdir directory unless savedir is
specified.
`
-//go:embed tessdata.20220322.zip
-var tessdatazip []byte
-
const QueueTimeoutSecs = 2 * 60
const PauseBetweenChecks = 1 * time.Second
const LogSaveTime = 1 * time.Minute
@@ -95,7 +91,7 @@ func resetTimer(t *time.Timer, d time.Duration) {
}
}
-// unpackTessZip unpacks a byte array of a zip file into a directory
+// unpackZip unpacks a byte array of a zip file into a directory
func unpackZip(b []byte, dir string) error {
br := bytes.NewReader(b)
zr, err := zip.NewReader(br, br.Size())
@@ -140,8 +136,10 @@ func unpackZip(b []byte, dir string) error {
func main() {
deftesscmd := "tesseract"
+ defgbookcmd := "getgbook"
if runtime.GOOS == "windows" {
deftesscmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
+ defgbookcmd = "getgbook.exe"
}
verbose := flag.Bool("v", false, "verbose")
@@ -153,6 +151,7 @@ These training files are included in rescribe, and are always available:
- lat.traineddata (Latin, modern print)
- rescribev9_fast.traineddata (Latin/English/French, printed ca 1500-1800)
`)
+ gbookcmd := flag.String("gbookcmd", defgbookcmd, "The getgbook executable to run. You may need to set this to the full path of getgbook.exe if you're on Windows.")
tesscmd := flag.String("tesscmd", deftesscmd, "The Tesseract executable to run. You may need to set this to the full path of Tesseract.exe if you're on Windows.")
wipe := flag.Bool("wipe", false, "Use wiper tool to remove noise like gutters from page before processing.")
fullpdf := flag.Bool("fullpdf", false, "Use highest image quality for searchable PDF (requires lots of RAM).")
@@ -187,7 +186,7 @@ These training files are included in rescribe, and are always available:
log.Fatalln("Error setting up tesseract directory:", err)
}
- if !*systess {
+ if !*systess && len(tesszip) > 0 {
err = unpackZip(tesszip, tessdir)
if err != nil {
log.Fatalln("Error unpacking embedded Tesseract zip:", err)
@@ -202,18 +201,31 @@ These training files are included in rescribe, and are always available:
}
}
- err = unpackZip(gbookzip, tessdir)
+ _, err = exec.LookPath(tessCommand)
if err != nil {
- log.Fatalln("Error unpacking embedded getgbook zip:", err)
+ log.Fatalf("No tesseract executable found [tried %s], either set -tesscmd and -systess on the command line or use the official build which includes an embedded copy of Tesseract.", tessCommand)
+ }
+
+ gbookCommand := *gbookcmd
+ if len(gbookzip) > 0 {
+ err = unpackZip(gbookzip, tessdir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded getgbook zip:", err)
+ }
+ switch runtime.GOOS {
+ case "darwin":
+ gbookCommand = filepath.Join(tessdir, "getgbook")
+ case "linux":
+ gbookCommand = filepath.Join(tessdir, "getgbook")
+ case "windows":
+ gbookCommand = filepath.Join(tessdir, "getgbook.exe")
+ }
}
- var gbookCommand string
- switch runtime.GOOS {
- case "darwin":
- gbookCommand = filepath.Join(tessdir, "getgbook")
- case "linux":
- gbookCommand = filepath.Join(tessdir, "getgbook")
- case "windows":
- gbookCommand = filepath.Join(tessdir, "getgbook.exe")
+
+ _, err = exec.LookPath(gbookCommand)
+ if err != nil {
+ log.Printf("No getgbook found [tried %s], google book downloading will be disabled, either set -gbookcmd on the command line or use the official build which includes an embedded getgbook.", gbookCommand)
+ gbookCommand = ""
}
tessdatadir := filepath.Join(tessdir, "tessdata")
@@ -221,9 +233,11 @@ These training files are included in rescribe, and are always available:
if err != nil {
log.Fatalln("Error setting up tessdata directory:", err)
}
- err = unpackZip(tessdatazip, tessdatadir)
- if err != nil {
- log.Fatalln("Error unpacking embedded tessdata zip:", err)
+ if len(tessdatazip) > 0 {
+ err = unpackZip(tessdatazip, tessdatadir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded tessdata zip:", err)
+ }
}
// if trainingPath doesn't exist, set it to the embedded training instead
@@ -233,14 +247,6 @@ These training files are included in rescribe, and are always available:
trainingPath = filepath.Join(tessdatadir, trainingPath)
}
- f, err := os.Open(trainingPath)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath)
- fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n")
- os.Exit(1)
- }
- f.Close()
-
abstraining, err := filepath.Abs(trainingPath)
if err != nil {
log.Fatalf("Error getting absolute path of training %s: %v", trainingPath, err)
@@ -265,6 +271,14 @@ These training files are included in rescribe, and are always available:
return
}
+ f, err := os.Open(trainingPath)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath)
+ fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n")
+ os.Exit(1)
+ }
+ f.Close()
+
bookdir := flag.Arg(0)
bookname := strings.ReplaceAll(filepath.Base(bookdir), " ", "_")
savedir := bookdir
diff --git a/cmd/rescribe/makefile b/cmd/rescribe/makefile
index ae92dda..23f17fb 100644
--- a/cmd/rescribe/makefile
+++ b/cmd/rescribe/makefile
@@ -26,17 +26,17 @@ all: dist/linux/rescribe dist/darwin/rescribe.zip dist/windows/rescribe.exe
dist/linux/rescribe: $(GODEPS)
go generate
mkdir -p dist/linux
- GOOS=linux GOARCH=amd64 go build -o $@ .
+ GOOS=linux GOARCH=amd64 go build -tags embed -o $@ .
build/darwin_amd64/rescribe: $(GODEPS)
go generate
mkdir -p build/darwin_amd64
- PATH="$(PATH):$(OSXCROSSBIN)" CC="o64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -o $@ .
+ PATH="$(PATH):$(OSXCROSSBIN)" CC="o64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -tags embed -o $@ .
build/darwin_arm64/rescribe: $(GODEPS)
go generate
mkdir -p build/darwin_arm64
- PATH="$(PATH):$(OSXCROSSBIN)" CC="oa64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -o $@ .
+ PATH="$(PATH):$(OSXCROSSBIN)" CC="oa64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -tags embed -o $@ .
build/darwin/rescribe: build/darwin_amd64/rescribe build/darwin_arm64/rescribe
mkdir -p build/darwin
@@ -54,7 +54,7 @@ dist/darwin/rescribe.zip: build/darwin/Rescribe.app
build/windows/rescribe-bin.exe: $(GODEPS)
go generate
mkdir -p build/windows
- CC="x86_64-w64-mingw32-gcc" CGO_ENABLED=1 GOOS=windows GOARCH=amd64 go build -o $@ .
+ CC="x86_64-w64-mingw32-gcc" CGO_ENABLED=1 GOOS=windows GOARCH=amd64 go build -tags embed -o $@ .
dist/windows/rescribe.exe: build/windows/rescribe-bin.exe
mkdir -p dist/windows