From a4d9ca8671292f9028db51748f4f028746d9bdef Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 23 Feb 2022 12:56:36 +0000 Subject: rescribe: Add embedded support for getgbook, for linux only so far --- cmd/rescribe/embed_linux.go | 3 +++ cmd/rescribe/embed_other.go | 1 + cmd/rescribe/gbook.go | 5 ++--- cmd/rescribe/getembeds.go | 1 + cmd/rescribe/gui.go | 4 ++-- cmd/rescribe/main.go | 20 +++++++++++++++++++- 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/cmd/rescribe/embed_linux.go b/cmd/rescribe/embed_linux.go index c720b6e..eb09dce 100644 --- a/cmd/rescribe/embed_linux.go +++ b/cmd/rescribe/embed_linux.go @@ -8,3 +8,6 @@ import _ "embed" //go:embed tesseract-linux-v5.0.0-alpha.20210510.zip var tesszip []byte + +//go:embed getgbook-linux-cac42fb.zip +var gbookzip []byte diff --git a/cmd/rescribe/embed_other.go b/cmd/rescribe/embed_other.go index fe51fd0..86848d2 100644 --- a/cmd/rescribe/embed_other.go +++ b/cmd/rescribe/embed_other.go @@ -11,3 +11,4 @@ package main // if not one of the above platforms, we won't embed anything, so // just create an empty byte slice var tesszip []byte +var gbookzip []byte diff --git a/cmd/rescribe/gbook.go b/cmd/rescribe/gbook.go index b1308cf..f0e9e1e 100644 --- a/cmd/rescribe/gbook.go +++ b/cmd/rescribe/gbook.go @@ -143,7 +143,7 @@ func moveFile(from string, to string) error { // getGoogleBook downloads all images of a book to a directory // named YEAR_AUTHORSURNAME_Title_bookid inside basedir, returning // the directory path -func getGoogleBook(ctx context.Context, id string, basedir string) (string, error) { +func getGoogleBook(ctx context.Context, gbookcmd string, id string, basedir string) (string, error) { author, title, year, err := getMetadata(id) if err != nil { return "", err @@ -154,8 +154,7 @@ func getGoogleBook(ctx context.Context, id string, basedir string) (string, erro return "", fmt.Errorf("Error setting up temporary directory: %v", err) } - // TODO: use embedded version if necessary - cmd := exec.CommandContext(ctx, "getgbook", id) + cmd := exec.CommandContext(ctx, gbookcmd, id) pipeline.HideCmd(cmd) cmd.Dir = tmpdir cmd.Stdout = os.Stdout diff --git a/cmd/rescribe/getembeds.go b/cmd/rescribe/getembeds.go index 6ab9fa8..a516234 100644 --- a/cmd/rescribe/getembeds.go +++ b/cmd/rescribe/getembeds.go @@ -87,6 +87,7 @@ func main() { {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-v4.1.1.20191227.zip", "5f567b95f1dea9d0581ad42ada4d1f1160a38ea22ae338f9efe190015265636b"}, {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-m1-v4.1.1.20210802.zip", "c9a454633f7e5175e2d50dd939d30a6e5bdfb3b8c78590a08b5aa21edbf32ca4"}, {"https://rescribe.xyz/rescribe/embeds/tesseract-w32-v5.0.0-alpha.20210506.zip", "96734f3db4bb7c3b9a241ab6d89ab3e8436cea43b1cbbcfb13999497982f63e3"}, + {"https://rescribe.xyz/rescribe/embeds/getgbook-linux-cac42fb.zip", "c3b40a1c13da613d383f990bda5dd72425a7f26b89102d272a3388eb3d05ddb6"}, } for _, v := range urls { if present(v.url, v.sum) { diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index fde660a..ad33a1e 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -245,7 +245,7 @@ func formatProgressBarText(bar *widget.ProgressBar) func() string { } // startGui starts the gui process -func startGui(log log.Logger, cmd string, training string, tessdir string) error { +func startGui(log log.Logger, cmd string, gbookcmd string, training string, tessdir string) error { myApp := app.New() myWindow := myApp.NewWindow("Rescribe OCR") @@ -464,7 +464,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error savedir = bookdir fmt.Printf("Downloading Google Book\n") - d, err := getGoogleBook(ctx, bookname, bookdir) + d, err := getGoogleBook(ctx, gbookcmd, bookname, bookdir) if err != nil { if !strings.HasSuffix(err.Error(), "signal: killed") { msg := fmt.Sprintf("Error downloading Google Book %s: %v\n", bookname, err) diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 7db5e3e..65ecd60 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -201,6 +201,20 @@ These training files are included in rescribe, and are always available: } } + err = unpackZip(gbookzip, tessdir) + if err != nil { + log.Fatalln("Error unpacking embedded getgbook zip:", err) + } + var gbookCommand string + switch runtime.GOOS { + case "darwin": + tessCommand = filepath.Join(tessdir, "getgbook") + case "linux": + gbookCommand = filepath.Join(tessdir, "getgbook") + case "windows": + tessCommand = filepath.Join(tessdir, "getgbook.exe") + } + tessdatadir := filepath.Join(tessdir, "tessdata") err = os.MkdirAll(tessdatadir, 0755) if err != nil { @@ -238,7 +252,7 @@ These training files are included in rescribe, and are always available: } if flag.NArg() < 1 || *usegui { - err := startGui(*verboselog, tessCommand, trainingName, tessdir) + err := startGui(*verboselog, tessCommand, gbookCommand, trainingName, tessdir) err = os.RemoveAll(tessdir) if err != nil { log.Printf("Error removing tesseract directory %s: %v", tessdir, err) @@ -266,12 +280,16 @@ These training files are included in rescribe, and are always available: var ctx context.Context + // TODO: support google book downloading, as done with the GUI + // try opening as a PDF, and extracting if !fi.IsDir() { if flag.NArg() < 2 { savedir = strings.TrimSuffix(bookdir, ".pdf") } + // BUG: this seems to fail from command line, yet works from GUI + // (used to work) bookdir, err = extractPdfImgs(ctx, bookdir) if err != nil { log.Fatalln("Error opening file as PDF:", err) -- cgit v1.2.1-24-ge1ad