summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2022-02-23 12:56:36 +0000
committerNick White <git@njw.name>2022-02-23 12:56:36 +0000
commita4d9ca8671292f9028db51748f4f028746d9bdef (patch)
tree56f3bc34ea6cc7ca2a48f50e25c0ae8b736c0e1f
parent2c29e91bd98a1354d0cc4772ca629e3f1fc327b5 (diff)
rescribe: Add embedded support for getgbook, for linux only so far
-rw-r--r--cmd/rescribe/embed_linux.go3
-rw-r--r--cmd/rescribe/embed_other.go1
-rw-r--r--cmd/rescribe/gbook.go5
-rw-r--r--cmd/rescribe/getembeds.go1
-rw-r--r--cmd/rescribe/gui.go4
-rw-r--r--cmd/rescribe/main.go20
6 files changed, 28 insertions, 6 deletions
diff --git a/cmd/rescribe/embed_linux.go b/cmd/rescribe/embed_linux.go
index c720b6e..eb09dce 100644
--- a/cmd/rescribe/embed_linux.go
+++ b/cmd/rescribe/embed_linux.go
@@ -8,3 +8,6 @@ import _ "embed"
//go:embed tesseract-linux-v5.0.0-alpha.20210510.zip
var tesszip []byte
+
+//go:embed getgbook-linux-cac42fb.zip
+var gbookzip []byte
diff --git a/cmd/rescribe/embed_other.go b/cmd/rescribe/embed_other.go
index fe51fd0..86848d2 100644
--- a/cmd/rescribe/embed_other.go
+++ b/cmd/rescribe/embed_other.go
@@ -11,3 +11,4 @@ package main
// if not one of the above platforms, we won't embed anything, so
// just create an empty byte slice
var tesszip []byte
+var gbookzip []byte
diff --git a/cmd/rescribe/gbook.go b/cmd/rescribe/gbook.go
index b1308cf..f0e9e1e 100644
--- a/cmd/rescribe/gbook.go
+++ b/cmd/rescribe/gbook.go
@@ -143,7 +143,7 @@ func moveFile(from string, to string) error {
// getGoogleBook downloads all images of a book to a directory
// named YEAR_AUTHORSURNAME_Title_bookid inside basedir, returning
// the directory path
-func getGoogleBook(ctx context.Context, id string, basedir string) (string, error) {
+func getGoogleBook(ctx context.Context, gbookcmd string, id string, basedir string) (string, error) {
author, title, year, err := getMetadata(id)
if err != nil {
return "", err
@@ -154,8 +154,7 @@ func getGoogleBook(ctx context.Context, id string, basedir string) (string, erro
return "", fmt.Errorf("Error setting up temporary directory: %v", err)
}
- // TODO: use embedded version if necessary
- cmd := exec.CommandContext(ctx, "getgbook", id)
+ cmd := exec.CommandContext(ctx, gbookcmd, id)
pipeline.HideCmd(cmd)
cmd.Dir = tmpdir
cmd.Stdout = os.Stdout
diff --git a/cmd/rescribe/getembeds.go b/cmd/rescribe/getembeds.go
index 6ab9fa8..a516234 100644
--- a/cmd/rescribe/getembeds.go
+++ b/cmd/rescribe/getembeds.go
@@ -87,6 +87,7 @@ func main() {
{"https://rescribe.xyz/rescribe/embeds/tesseract-osx-v4.1.1.20191227.zip", "5f567b95f1dea9d0581ad42ada4d1f1160a38ea22ae338f9efe190015265636b"},
{"https://rescribe.xyz/rescribe/embeds/tesseract-osx-m1-v4.1.1.20210802.zip", "c9a454633f7e5175e2d50dd939d30a6e5bdfb3b8c78590a08b5aa21edbf32ca4"},
{"https://rescribe.xyz/rescribe/embeds/tesseract-w32-v5.0.0-alpha.20210506.zip", "96734f3db4bb7c3b9a241ab6d89ab3e8436cea43b1cbbcfb13999497982f63e3"},
+ {"https://rescribe.xyz/rescribe/embeds/getgbook-linux-cac42fb.zip", "c3b40a1c13da613d383f990bda5dd72425a7f26b89102d272a3388eb3d05ddb6"},
}
for _, v := range urls {
if present(v.url, v.sum) {
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index fde660a..ad33a1e 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -245,7 +245,7 @@ func formatProgressBarText(bar *widget.ProgressBar) func() string {
}
// startGui starts the gui process
-func startGui(log log.Logger, cmd string, training string, tessdir string) error {
+func startGui(log log.Logger, cmd string, gbookcmd string, training string, tessdir string) error {
myApp := app.New()
myWindow := myApp.NewWindow("Rescribe OCR")
@@ -464,7 +464,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error
savedir = bookdir
fmt.Printf("Downloading Google Book\n")
- d, err := getGoogleBook(ctx, bookname, bookdir)
+ d, err := getGoogleBook(ctx, gbookcmd, bookname, bookdir)
if err != nil {
if !strings.HasSuffix(err.Error(), "signal: killed") {
msg := fmt.Sprintf("Error downloading Google Book %s: %v\n", bookname, err)
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 7db5e3e..65ecd60 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -201,6 +201,20 @@ These training files are included in rescribe, and are always available:
}
}
+ err = unpackZip(gbookzip, tessdir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded getgbook zip:", err)
+ }
+ var gbookCommand string
+ switch runtime.GOOS {
+ case "darwin":
+ tessCommand = filepath.Join(tessdir, "getgbook")
+ case "linux":
+ gbookCommand = filepath.Join(tessdir, "getgbook")
+ case "windows":
+ tessCommand = filepath.Join(tessdir, "getgbook.exe")
+ }
+
tessdatadir := filepath.Join(tessdir, "tessdata")
err = os.MkdirAll(tessdatadir, 0755)
if err != nil {
@@ -238,7 +252,7 @@ These training files are included in rescribe, and are always available:
}
if flag.NArg() < 1 || *usegui {
- err := startGui(*verboselog, tessCommand, trainingName, tessdir)
+ err := startGui(*verboselog, tessCommand, gbookCommand, trainingName, tessdir)
err = os.RemoveAll(tessdir)
if err != nil {
log.Printf("Error removing tesseract directory %s: %v", tessdir, err)
@@ -266,12 +280,16 @@ These training files are included in rescribe, and are always available:
var ctx context.Context
+ // TODO: support google book downloading, as done with the GUI
+
// try opening as a PDF, and extracting
if !fi.IsDir() {
if flag.NArg() < 2 {
savedir = strings.TrimSuffix(bookdir, ".pdf")
}
+ // BUG: this seems to fail from command line, yet works from GUI
+ // (used to work)
bookdir, err = extractPdfImgs(ctx, bookdir)
if err != nil {
log.Fatalln("Error opening file as PDF:", err)