summaryrefslogtreecommitdiff
path: root/cmd/rescribe
diff options
context:
space:
mode:
Diffstat (limited to 'cmd/rescribe')
-rw-r--r--cmd/rescribe/TODO13
-rw-r--r--cmd/rescribe/embed_darwin.go12
-rw-r--r--cmd/rescribe/embed_darwin_amd64.go2
-rw-r--r--cmd/rescribe/embed_darwin_arm64.go2
-rw-r--r--cmd/rescribe/embed_linux.go5
-rw-r--r--cmd/rescribe/embed_other.go8
-rw-r--r--cmd/rescribe/embed_tessdata.go12
-rw-r--r--cmd/rescribe/embed_windows.go5
-rw-r--r--cmd/rescribe/gbook.go259
-rw-r--r--cmd/rescribe/gbook_test.go46
-rw-r--r--cmd/rescribe/getembeds.go77
-rw-r--r--cmd/rescribe/gui.go555
-rw-r--r--cmd/rescribe/gui_test.go77
-rw-r--r--cmd/rescribe/icon.256.pngbin0 -> 5945 bytes
-rw-r--r--cmd/rescribe/icon.pngbin0 -> 13997 bytes
-rw-r--r--cmd/rescribe/icon.svg60
-rw-r--r--cmd/rescribe/main.go443
-rw-r--r--cmd/rescribe/makefile81
-rw-r--r--cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a2
-rw-r--r--cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f79632
-rw-r--r--cmd/rescribe/xyz.rescribe.rescribe.appdata.xml43
-rw-r--r--cmd/rescribe/xyz.rescribe.rescribe.desktop9
-rw-r--r--cmd/rescribe/xyz.rescribe.rescribe.yml65
23 files changed, 1636 insertions, 142 deletions
diff --git a/cmd/rescribe/TODO b/cmd/rescribe/TODO
new file mode 100644
index 0000000..2a31b9a
--- /dev/null
+++ b/cmd/rescribe/TODO
@@ -0,0 +1,13 @@
+Add option to choose multiple languages (easy on tesseract level, slightly tricky on GUI level)
+
+Write to PDF as we go along, so memory requirements are reduced. Would require further modifying fpdf: https://github.com/jung-kurt/gofpdf/issues/110
+
+Maybe pay apple $100 for a certificate to avoid their security warnings (can be easily included in 'fyne package')
+
+Add more tests.
+
+Improve progress bar by finding how many files will be ocred and counting the number done to provide an accurate count
+
+Maybe parse CropBox from PDF as we do rotate, to crop images appropriately
+
+Add to "microsoft store"
diff --git a/cmd/rescribe/embed_darwin.go b/cmd/rescribe/embed_darwin.go
new file mode 100644
index 0000000..4f43b87
--- /dev/null
+++ b/cmd/rescribe/embed_darwin.go
@@ -0,0 +1,12 @@
+// Copyright 2021 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+//go:build embed
+
+package main
+
+import _ "embed"
+
+//go:embed getgbook-darwin-b14f62f.zip
+var gbookzip []byte
diff --git a/cmd/rescribe/embed_darwin_amd64.go b/cmd/rescribe/embed_darwin_amd64.go
index 719c9cc..1f7f8c2 100644
--- a/cmd/rescribe/embed_darwin_amd64.go
+++ b/cmd/rescribe/embed_darwin_amd64.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_darwin_arm64.go b/cmd/rescribe/embed_darwin_arm64.go
index a1ca9b8..4c154be 100644
--- a/cmd/rescribe/embed_darwin_arm64.go
+++ b/cmd/rescribe/embed_darwin_arm64.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
diff --git a/cmd/rescribe/embed_linux.go b/cmd/rescribe/embed_linux.go
index c720b6e..3cfd18b 100644
--- a/cmd/rescribe/embed_linux.go
+++ b/cmd/rescribe/embed_linux.go
@@ -2,9 +2,14 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
//go:embed tesseract-linux-v5.0.0-alpha.20210510.zip
var tesszip []byte
+
+//go:embed getgbook-linux-cac42fb.zip
+var gbookzip []byte
diff --git a/cmd/rescribe/embed_other.go b/cmd/rescribe/embed_other.go
index fe51fd0..ac9ce3a 100644
--- a/cmd/rescribe/embed_other.go
+++ b/cmd/rescribe/embed_other.go
@@ -2,12 +2,12 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
-// +build !darwin
-// +build !linux
-// +build !windows
+//go:build (!darwin && !linux && !windows) || !embed
package main
// if not one of the above platforms, we won't embed anything, so
-// just create an empty byte slice
+// just create empty byte slices
var tesszip []byte
+var gbookzip []byte
+var tessdatazip []byte
diff --git a/cmd/rescribe/embed_tessdata.go b/cmd/rescribe/embed_tessdata.go
new file mode 100644
index 0000000..ea9ce8f
--- /dev/null
+++ b/cmd/rescribe/embed_tessdata.go
@@ -0,0 +1,12 @@
+// Copyright 2022 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+//go:build embed
+
+package main
+
+import _ "embed"
+
+//go:embed tessdata.20220322.zip
+var tessdatazip []byte
diff --git a/cmd/rescribe/embed_windows.go b/cmd/rescribe/embed_windows.go
index c447624..f3fe193 100644
--- a/cmd/rescribe/embed_windows.go
+++ b/cmd/rescribe/embed_windows.go
@@ -2,9 +2,14 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
+//go:build embed
+
package main
import _ "embed"
//go:embed tesseract-w32-v5.0.0-alpha.20210506.zip
var tesszip []byte
+
+//go:embed getgbook-w32-c2824685.zip
+var gbookzip []byte
diff --git a/cmd/rescribe/gbook.go b/cmd/rescribe/gbook.go
new file mode 100644
index 0000000..a011181
--- /dev/null
+++ b/cmd/rescribe/gbook.go
@@ -0,0 +1,259 @@
+// Copyright 2022 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "os"
+ "os/exec"
+ "path"
+ "regexp"
+ "strings"
+ "unicode"
+
+ "rescribe.xyz/bookpipeline/internal/pipeline"
+)
+
+const maxPartLength = 48
+
+// formatAuthors formats a list of authors by just selecting
+// the first one listed, and returning the uppercased final
+// name.
+func formatAuthors(authors []string) string {
+ if len(authors) == 0 {
+ return ""
+ }
+
+ s := authors[0]
+
+ parts := strings.Fields(s)
+ if len(parts) > 1 {
+ s = parts[len(parts)-1]
+ }
+
+ s = strings.ToUpper(s)
+
+ if len(s) > maxPartLength {
+ // truncate to maxPartLength
+ m := fmt.Sprintf("%%.%ds", maxPartLength)
+ s = fmt.Sprintf(m, s)
+ }
+
+ s = strings.Map(stripNonLetters, s)
+
+ return s
+}
+
+// mapTitle is a function for strings.Map to strip out
+// unwanted characters from the title.
+func stripNonLetters(r rune) rune {
+ if !unicode.IsLetter(r) {
+ return -1
+ }
+ return r
+}
+
+// formatTitle formats a title to our preferences, notably
+// by stripping spaces and punctuation characters.
+func formatTitle(title string) string {
+ s := strings.Map(stripNonLetters, title)
+ if len(s) > maxPartLength {
+ // truncate to maxPartLength
+ m := fmt.Sprintf("%%.%ds", maxPartLength)
+ s = fmt.Sprintf(m, s)
+ }
+ return s
+}
+
+// getMetadata queries Google Books for metadata we care about
+// and returns it formatted as we need it.
+func getMetadata(id string) (string, string, string, error) {
+ var author, title, year string
+ url := fmt.Sprintf("https://www.googleapis.com/books/v1/volumes/%s", id)
+
+ // designed to be unmarshalled by encoding/json's Unmarshal()
+ type bookInfo struct {
+ VolumeInfo struct {
+ Title string
+ Authors []string
+ PublishedDate string
+ }
+ }
+
+ resp, err := http.Get(url)
+ if err != nil {
+ return author, title, year, fmt.Errorf("Error downloading metadata %s: %v", url, err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return author, title, year, fmt.Errorf("Error downloading metadata %s: %v", url, err)
+ }
+
+ b, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return author, title, year, fmt.Errorf("Error reading metadata %s: %v", url, err)
+ }
+
+ v := bookInfo{}
+ err = json.Unmarshal(b, &v)
+ if err != nil {
+ return author, title, year, fmt.Errorf("Error parsing metadata %s: %v", url, err)
+ }
+
+ author = formatAuthors(v.VolumeInfo.Authors)
+ title = formatTitle(v.VolumeInfo.Title)
+ year = v.VolumeInfo.PublishedDate
+
+ return author, title, year, nil
+}
+
+// moveFile just copies a file to the destination without
+// using os.Rename, as that can fail if crossing filesystem
+// boundaries
+func moveFile(from string, to string) error {
+ ffrom, err := os.Open(from)
+ if err != nil {
+ return err
+ }
+ defer ffrom.Close()
+
+ fto, err := os.Create(to)
+ if err != nil {
+ return err
+ }
+ defer fto.Close()
+
+ _, err = io.Copy(fto, ffrom)
+ if err != nil {
+ return err
+ }
+
+ ffrom.Close()
+ err = os.Remove(from)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// getGoogleBook downloads all images of a book to a directory
+// named YEAR_AUTHORSURNAME_Title_bookid inside basedir, returning
+// the directory path
+func getGoogleBook(ctx context.Context, gbookcmd string, id string, basedir string) (string, error) {
+ author, title, year, err := getMetadata(id)
+ if err != nil {
+ return "", err
+ }
+
+ tmpdir, err := ioutil.TempDir("", "bookpipeline")
+ if err != nil {
+ return "", fmt.Errorf("Error setting up temporary directory: %v", err)
+ }
+
+ cmd := exec.CommandContext(ctx, gbookcmd, id)
+ pipeline.HideCmd(cmd)
+ cmd.Dir = tmpdir
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ err = cmd.Run()
+ if err != nil {
+ return "", fmt.Errorf("Error running getgbook %s: %v", id, err)
+ }
+
+ select {
+ case <-ctx.Done():
+ _ = os.Remove(tmpdir)
+ return "", ctx.Err()
+ default:
+ }
+
+ // getgbook downloads into id directory, so move files out of
+ // there directly into dir
+ tmpdir = path.Join(tmpdir, id)
+ f, err := os.Open(tmpdir)
+ if err != nil {
+ return "", fmt.Errorf("Failed to open %s to move files: %v", tmpdir, err)
+ }
+ files, err := f.Readdir(0)
+ if err != nil {
+ return "", fmt.Errorf("Failed to readdir %s to move files: %v", tmpdir, err)
+ }
+
+ d := fmt.Sprintf("%s_%s_%s_%s", year, author, title, id)
+ dir := path.Join(basedir, d)
+ err = os.MkdirAll(dir, 0755)
+ if err != nil {
+ return "", fmt.Errorf("Couldn't create directory %s: %v", dir, err)
+ }
+
+ for _, v := range files {
+ orig := path.Join(tmpdir, v.Name())
+ new := path.Join(dir, v.Name())
+ err = moveFile(orig, new)
+ if err != nil {
+ return dir, fmt.Errorf("Failed to move %s to %s: %v", orig, new, err)
+ }
+ }
+
+ err = os.Remove(tmpdir)
+ if err != nil {
+ return dir, fmt.Errorf("Failed to remove temporary directory %s: %v", tmpdir, err)
+ }
+
+ return dir, nil
+}
+
+// getBookIdFromUrl returns a 12 character Google Book ID from
+// a Google URL, or an error if one can't be found.
+// Example URLs:
+// https://books.google.it/books?id=QjQepCuN8JYC
+// https://www.google.it/books/edition/_/VJbr-Oe2au0C
+func getBookIdFromUrl(url string) (string, error) {
+ lurl := strings.ToLower(url)
+ if len(url) == 12 && !strings.ContainsAny(url, "?/:") {
+ return url, nil
+ }
+
+ matchUrl, err := regexp.MatchString("https://www.google.[^\\/]*/books/", url)
+ if err != nil {
+ return "", err
+ }
+
+ if strings.HasPrefix(lurl, "https://books.google") {
+ start := strings.Index(lurl, "?id=")
+ if start == -1 {
+ start = strings.Index(lurl, "&id=")
+ }
+
+ if start >= 0 {
+ start += 4
+ if len(url) - start < 12 {
+ return "", fmt.Errorf("Could not find book ID in URL")
+ }
+ return url[start : start+12], nil
+ }
+
+ return "", fmt.Errorf("Could not find book ID in URL")
+ }
+ if matchUrl == true {
+ start := strings.Index(lurl, "edition/_/")
+
+ if start >= 0 {
+ start += 10
+ if len(url) - start < 12 {
+ return "", fmt.Errorf("Could not find book ID in URL")
+ }
+ return url[start : start+12], nil
+ }
+ }
+ return "", fmt.Errorf("Could not find book ID in URL")
+}
diff --git a/cmd/rescribe/gbook_test.go b/cmd/rescribe/gbook_test.go
new file mode 100644
index 0000000..f7df595
--- /dev/null
+++ b/cmd/rescribe/gbook_test.go
@@ -0,0 +1,46 @@
+// Copyright 2022 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "testing"
+)
+
+func TestGetBookIdFromUrl(t *testing.T) {
+ cases := []struct {
+ url string
+ id string
+ }{
+ {"https://books.google.it/books?id=QjQepCuN8JYC", "QjQepCuN8JYC"},
+ {"https://www.google.it/books/edition/_/VJbr-Oe2au0C", "VJbr-Oe2au0C"},
+ }
+
+ for _, c := range cases {
+ t.Run(c.url, func(t *testing.T) {
+ id, err := getBookIdFromUrl(c.url)
+ if err != nil {
+ t.Fatalf("Error running test: %v", err)
+ }
+ if id != c.id {
+ t.Fatalf("Expected %s, got %s", c.id, id)
+ }
+ })
+ }
+}
+
+func FuzzGetBookIdFromUrl(f *testing.F) {
+ cases := []string {
+ "https://books.google.it/books?id=QjQepCuN8JYC",
+ "https://www.google.it/books/edition/_/VJbr-Oe2au0C",
+ }
+
+ for _, c := range cases {
+ f.Add(c)
+ }
+
+ f.Fuzz(func(t *testing.T, url string) {
+ getBookIdFromUrl(url)
+ })
+}
diff --git a/cmd/rescribe/getembeds.go b/cmd/rescribe/getembeds.go
index 91cd480..57c7ce0 100644
--- a/cmd/rescribe/getembeds.go
+++ b/cmd/rescribe/getembeds.go
@@ -2,15 +2,19 @@
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
-// +build ignore
+//go:build ignore
// this downloads the needed files to embed into the binary,
// and is run by `go generate`
package main
import (
+ "bytes"
+ "crypto/sha256"
+ "encoding/hex"
"fmt"
"io"
+ "io/ioutil"
"net/http"
"os"
"path"
@@ -30,6 +34,9 @@ func dl(url string) error {
return fmt.Errorf("Error getting url %s: %v", url, err)
}
defer r.Body.Close()
+ if r.StatusCode != 200 {
+ return fmt.Errorf("Error getting url %s: got code %v", url, r.StatusCode)
+ }
_, err = io.Copy(f, r.Body)
if err != nil {
@@ -39,19 +46,69 @@ func dl(url string) error {
return nil
}
+// present returns true if the file is present and matches the
+// checksum, false otherwise
+func present(url string, sum string) bool {
+ fn := path.Base(url)
+ _, err := os.Stat(fn)
+ if err != nil && !os.IsExist(err) {
+ return false
+ }
+
+ b, err := ioutil.ReadFile(fn)
+ if err != nil {
+ return false
+ }
+
+ expected, err := hex.DecodeString(sum)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error decoding checksum for %s: %v\n", url, err)
+ os.Exit(1)
+ }
+
+ actual := sha256.Sum256(b)
+
+ var a []byte
+ for _, v := range actual {
+ a = append(a, v)
+ }
+
+ if !bytes.Equal(a, expected) {
+ return false
+ }
+
+ return true
+}
+
func main() {
- urls := []string{
- "https://rescribe.xyz/rescribe/embeds/tessdata.20211001.zip",
- "https://rescribe.xyz/rescribe/embeds/tesseract-linux-v5.0.0-alpha.20210510.zip",
- "https://rescribe.xyz/rescribe/embeds/tesseract-osx-v4.1.1.20191227.zip",
- "https://rescribe.xyz/rescribe/embeds/tesseract-osx-m1-v4.1.1.20210802.zip",
- "https://rescribe.xyz/rescribe/embeds/tesseract-w32-v5.0.0-alpha.20210506.zip",
+ urls := []struct {
+ url string
+ sum string
+ }{
+ {"https://rescribe.xyz/rescribe/embeds/tessdata.20220322.zip", "725fd570a3c3dc0eba9463248ce47a8646db8bafb198d428d6bb8f0be18540ee"},
+ {"https://rescribe.xyz/rescribe/embeds/tesseract-linux-v5.0.0-alpha.20210510.zip", "81cfba632b8aaf0a00180b1aa62d357d50f343b0e9bd51b941ee14c289ccd889"},
+ {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-v4.1.1.20191227.zip", "5f567b95f1dea9d0581ad42ada4d1f1160a38ea22ae338f9efe190015265636b"},
+ {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-m1-v4.1.1.20210802.zip", "c9a454633f7e5175e2d50dd939d30a6e5bdfb3b8c78590a08b5aa21edbf32ca4"},
+ {"https://rescribe.xyz/rescribe/embeds/tesseract-w32-v5.0.0-alpha.20210506.zip", "96734f3db4bb7c3b9a241ab6d89ab3e8436cea43b1cbbcfb13999497982f63e3"},
+ {"https://rescribe.xyz/rescribe/embeds/getgbook-darwin-b14f62f.zip", "d21bc4d51c5f43af68d77ef257061a0635cce0610b769d23a340b3be528a92d8"},
+ {"https://rescribe.xyz/rescribe/embeds/getgbook-linux-cac42fb.zip", "c3b40a1c13da613d383f990bda5dd72425a7f26b89102d272a3388eb3d05ddb6"},
+ {"https://rescribe.xyz/rescribe/embeds/getgbook-w32-c2824685.zip", "1c258a77a47d6515718fbbd7e54d5c2b516291682a878d122add55901c9f2914"},
}
for _, v := range urls {
- fmt.Printf("Downloading %s\n", v)
- err := dl(v)
+ if present(v.url, v.sum) {
+ fmt.Printf("Skipping downloading of already present %s\n", path.Base(v.url))
+ continue
+ }
+
+ fmt.Printf("Downloading %s\n", v.url)
+ err := dl(v.url)
if err != nil {
- fmt.Printf("Error downloading %s: %v\n", v, err)
+ fmt.Fprintf(os.Stderr, "Error downloading %s: %v\n", v.url, err)
+ os.Exit(1)
+ }
+
+ if !present(v.url, v.sum) {
+ fmt.Fprintf(os.Stderr, "Error: downloaded %s does not match expected checksum\n", v.url)
os.Exit(1)
}
}
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index 654d875..73f1db2 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -1,4 +1,4 @@
-// Copyright 2021 Nick White.
+// Copyright 2021-2022 Nick White.
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
@@ -6,6 +6,8 @@ package main
import (
"bufio"
+ "context"
+ "errors"
"fmt"
"io"
"log"
@@ -18,19 +20,35 @@ import (
"fyne.io/fyne/v2/container"
"fyne.io/fyne/v2/dialog"
"fyne.io/fyne/v2/layout"
+ "fyne.io/fyne/v2/storage"
"fyne.io/fyne/v2/theme"
"fyne.io/fyne/v2/widget"
)
+var progressPoints = map[float64]string{
+ 0.11: "Downloading",
+ 0.12: "Processing PDF",
+ 0.2: "Preprocessing",
+ 0.5: "OCRing",
+ 0.9: "Analysing",
+ 1.0: "Done",
+}
+
+var trainingNames = map[string]string{
+ "eng": "English (modern print)",
+ "lat": "Latin (modern print)",
+ "rescribev9_fast": "Latin/English/French (printed ca 1500-1800)",
+}
+
// copyStdoutToChan creates a pipe to copy anything written
-// to stdout instead to a rune channel
+// to the file also to a rune channel.
func copyStdoutToChan() (chan rune, error) {
c := make(chan rune)
- origStdout := os.Stdout
+ origFile := os.Stdout
r, w, err := os.Pipe()
if err != nil {
- return c, fmt.Errorf("Error creating pipe for stdout redirection: %v", err)
+ return c, fmt.Errorf("Error creating pipe for file redirection: %v", err)
}
os.Stdout = w
@@ -40,7 +58,7 @@ func copyStdoutToChan() (chan rune, error) {
defer func() {
close(c)
w.Close()
- os.Stdout = origStdout
+ os.Stdout = origFile
}()
for {
r, _, err := bufReader.ReadRune()
@@ -57,81 +75,512 @@ func copyStdoutToChan() (chan rune, error) {
return c, nil
}
-// startGui starts the gui process
-func startGui(log log.Logger, cmd string, training string, systess bool, tessdir string) error {
- myApp := app.New()
- myWindow := myApp.NewWindow("Rescribe OCR")
+// copyStderrToChan creates a pipe to copy anything written
+// to the file also to a rune channel.
+// TODO: would be nice to merge this with copyStdoutToChan,
+// but a naive version using *os.File didn't work.
+func copyStderrToChan() (chan rune, error) {
+ c := make(chan rune)
- var gobtn *widget.Button
+ origFile := os.Stderr
+ r, w, err := os.Pipe()
+ if err != nil {
+ return c, fmt.Errorf("Error creating pipe for file redirection: %v", err)
+ }
+ os.Stderr = w
- dir := widget.NewEntry()
- dir.SetPlaceHolder("Folder to process")
- dir.OnChanged = func(s string) {
- // TODO: also check if string is a directory, and only enable if so
- if dir.Text != "" {
- gobtn.Enable()
- } else {
- gobtn.Disable()
+ bufReader := bufio.NewReader(r)
+
+ go func() {
+ defer func() {
+ close(c)
+ w.Close()
+ os.Stderr = origFile
+ }()
+ for {
+ r, _, err := bufReader.ReadRune()
+ if err != nil && err != io.EOF {
+ return
+ }
+ c <- r
+ if err == io.EOF {
+ return
+ }
+ }
+ }()
+
+ return c, nil
+}
+
+// trainingSelectOnChange is a closure to handle change of the training
+// select box. It does nothing in most cases, but if "Other..." has been
+// selected, then it pops up a file chooser and adds the result to the
+// list, also copying the file to the TESSDATA_PREFIX, and selects it.
+func trainingSelectOnChange(sel *widget.Select, parent fyne.Window) func(string) {
+ return func(str string) {
+ if sel == nil {
+ return
+ }
+ if str != "Other..." {
+ return
}
+ d := dialog.NewFileOpen(func(uri fyne.URIReadCloser, err error) {
+ if err != nil || uri == nil {
+ sel.SetSelectedIndex(0)
+ return
+ }
+ defer uri.Close()
+ name := uri.URI().Name()
+ newpath := filepath.Join(os.Getenv("TESSDATA_PREFIX"), name)
+ f, err := os.Create(newpath)
+ if err != nil {
+ msg := fmt.Sprintf("Error creating temporary file to store custom training: %v\n", err)
+ dialog.ShowError(errors.New(msg), parent)
+ fmt.Fprintf(os.Stderr, msg)
+ sel.SetSelectedIndex(0)
+ return
+ }
+ defer f.Close()
+ _, err = io.Copy(f, uri)
+ if err != nil {
+ msg := fmt.Sprintf("Error copying custom training to temporary file: %v\n", err)
+ dialog.ShowError(errors.New(msg), parent)
+ fmt.Fprintf(os.Stderr, msg)
+ sel.SetSelectedIndex(0)
+ return
+ }
+
+ basicname := strings.TrimSuffix(name, ".traineddata")
+ opts := append([]string{basicname}, sel.Options...)
+ sel.Options = opts
+ sel.SetSelectedIndex(0)
+ }, parent)
+ d.SetFilter(storage.NewExtensionFileFilter([]string{".traineddata"}))
+ d.Resize(fyne.NewSize(740, 600))
+ d.Show()
}
+}
- openbtn := widget.NewButtonWithIcon("Choose folder", theme.FolderOpenIcon(), func() {
- dialog.ShowFolderOpen(func(uri fyne.ListableURI, err error) {
- if err == nil && uri != nil {
- dir.SetText(uri.Path())
+// mkTrainingSelect returns a select widget with all training
+// files in TESSDATA_PREFIX/training, any other trainings listed
+// in the extras slice, selecting the first entry.
+func mkTrainingSelect(extras []string, parent fyne.Window) *widget.Select {
+ prefix := os.Getenv("TESSDATA_PREFIX")
+ fn, err := filepath.Glob(prefix + "/*.traineddata")
+ if err != nil {
+ fn = []string{}
+ }
+ var opts []string
+ for _, v := range append(extras, fn...) {
+ t := strings.TrimSuffix(strings.TrimPrefix(v, prefix), ".traineddata")
+ if t == "osd" {
+ continue
+ }
+ for code, name := range trainingNames {
+ if t == code {
+ t = fmt.Sprintf("%s [%s]", name, code)
+ break
}
- }, myWindow)
- })
+ }
+ alreadythere := 0
+ for _, opt := range opts {
+ if t == opt {
+ alreadythere = 1
+ break
+ }
+ }
+ if alreadythere == 0 {
+ opts = append(opts, t)
+ }
+ }
- progressBar := widget.NewProgressBar()
+ opts = append(opts, "Other...")
+ s := widget.NewSelect(opts, func(string) {})
+ // OnChanged is set outside of NewSelect so the reference to s isn't nil
+ s.OnChanged = trainingSelectOnChange(s, parent)
+ s.SetSelectedIndex(0)
+ return s
+}
- logarea := widget.NewMultiLineEntry()
- logarea.Disable()
+// formatProgressBar uses the progressPoints map to set the text for the progress bar
+// appropriately
+func formatProgressBar(bar *widget.ProgressBar) func() string {
+ return func() string {
+ for i, v := range progressPoints {
+ if bar.Value == i {
+ return v
+ }
+ }
+ // OCRing gets special treatment as the bar can be updated within the range
+ if bar.Value >= 0.5 && bar.Value < 0.9 {
+ return progressPoints[0.5]
+ }
+ if bar.Value == 0 {
+ return ""
+ }
+ return "Processing"
+ }
+}
- // TODO: have the button be pressed if enter is pressed
- gobtn = widget.NewButtonWithIcon("Process OCR", theme.UploadIcon(), func() {
- if dir.Text == "" {
- return
+// updateProgress parses the last line of a log and updates a progress
+// bar appropriately.
+func updateProgress(log string, progressBar *widget.ProgressBar) {
+ lines := strings.Split(log, "\n")
+ lastline := lines[len(lines)-1]
+ for i, v := range progressPoints {
+ if strings.HasPrefix(lastline, " "+v) {
+ // OCRing has a number of dots after it showing how many pages have been processed,
+ // which we can use to update progress bar more often
+ // TODO: calculate number of pages we expect, so this can be set accurately
+ if v == "OCRing" {
+ if progressBar.Value < 0.5 {
+ progressBar.SetValue(0.5)
+ }
+ numdots := strings.Count(lastline, ".")
+ newval := float64(0.5) + (float64(numdots) * float64(0.01))
+ if newval >= 0.9 {
+ newval = 0.89
+ }
+ progressBar.SetValue(newval)
+ break
+ }
+ progressBar.SetValue(i)
+ }
+ }
+}
+
+// start sets up the gui to start the core process, and if all is well
+// it starts it
+func start(ctx context.Context, log *log.Logger, cmd string, tessdir string, gbookcmd string, dir string, training string, win fyne.Window, logarea *widget.Entry, progressBar *widget.ProgressBar, abortbtn *widget.Button, wipe bool, bigpdf bool, disableWidgets []fyne.Disableable) {
+ if dir == "" {
+ return
+ }
+
+ stdout, err := copyStdoutToChan()
+ if err != nil {
+ msg := fmt.Sprintf("Internal error\n\nError copying stdout to chan: %v\n", err)
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
+ return
+ }
+ go func() {
+ for r := range stdout {
+ logarea.SetText(logarea.Text + string(r))
+ logarea.CursorRow = strings.Count(logarea.Text, "\n")
+ updateProgress(logarea.Text, progressBar)
+ }
+ }()
+
+ stderr, err := copyStderrToChan()
+ if err != nil {
+ msg := fmt.Sprintf("Internal error\n\nError copying stdout to chan: %v\n", err)
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
+ return
+ }
+ go func() {
+ for r := range stderr {
+ logarea.SetText(logarea.Text + string(r))
+ logarea.CursorRow = strings.Count(logarea.Text, "\n")
}
+ }()
- gobtn.Disable()
- gobtn.SetText("Processing...")
+ // Do this in a goroutine so the GUI remains responsive
+ go func() {
+ letsGo(ctx, log, cmd, tessdir, gbookcmd, dir, training, win, logarea, progressBar, abortbtn, wipe, bigpdf, disableWidgets)
+ }()
+}
- progressBar.SetValue(0.5)
+// letsGo starts the core process
+func letsGo(ctx context.Context, log *log.Logger, cmd string, tessdir string, gbookcmd string, dir string, training string, win fyne.Window, logarea *widget.Entry, progressBar *widget.ProgressBar, abortbtn *widget.Button, wipe bool, bigpdf bool, disableWidgets []fyne.Disableable) {
+ bookdir := dir
+ savedir := dir
+ bookname := strings.ReplaceAll(filepath.Base(dir), " ", "_")
- stdout, err := copyStdoutToChan()
- if err != nil {
- fmt.Fprintf(os.Stderr, "Error copying stdout to chan: %v\n", err)
+ f, err := os.Stat(bookdir)
+ if err != nil && !strings.HasPrefix(bookdir, "Google Book: ") {
+ msg := fmt.Sprintf("Error opening %s: %v", bookdir, err)
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
+
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ return
+ }
+
+ for _, v := range disableWidgets {
+ v.Disable()
+ }
+
+ abortbtn.Enable()
+
+ progressBar.SetValue(0.1)
+
+ if strings.HasPrefix(dir, "Google Book: ") {
+ if gbookcmd == "" {
+ msg := fmt.Sprintf("No getgbook found, can't download Google Book. Either set -gbookcmd on the command line, or use the official build which includes an embedded copy of getgbook.\n")
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
return
}
+ progressBar.SetValue(0.11)
+ start := len("Google Book: ")
+ bookname = dir[start : start+12]
- // update log area with output from outC in a concurrent goroutine
- go func() {
- for r := range stdout {
- logarea.SetText(logarea.Text + string(r))
- logarea.CursorRow = strings.Count(logarea.Text, "\n")
- // TODO: set text on progress bar, or a label below it, to latest line printed, rather than just using a whole multiline entry like this
- // TODO: parse the stdout and set progressBar based on that
+ start = start + 12 + len(" Save to: ")
+ bookdir = dir[start:]
+ savedir = bookdir
+
+ fmt.Printf("Downloading Google Book\n")
+ d, err := getGoogleBook(ctx, gbookcmd, bookname, bookdir)
+ if err != nil {
+ if !strings.HasSuffix(err.Error(), "signal: killed") {
+ msg := fmt.Sprintf("Error downloading Google Book %s\n", bookname)
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
}
- }()
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ return
+ }
+ bookdir = d
+ savedir = d
+ bookname = filepath.Base(d)
+ }
- err = startProcess(log, cmd, dir.Text, filepath.Base(dir.Text), training, systess, dir.Text, tessdir)
+ if strings.HasSuffix(dir, ".pdf") && !f.IsDir() {
+ progressBar.SetValue(0.12)
+ bookdir, err = extractPdfImgs(ctx, bookdir)
if err != nil {
- fmt.Fprintf(os.Stderr, "Error executing process: %v\n", err)
+ if !strings.HasSuffix(err.Error(), "context canceled") {
+ msg := fmt.Sprintf("Error opening PDF %s: %v\n", bookdir, err)
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
+ }
+
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
return
}
- progressBar.SetValue(1.0)
- gobtn.SetText("Process OCR")
- gobtn.Enable()
+ // happens if extractPdfImgs recovers from a PDF panic,
+ // which will occur if we encounter an image we can't decode
+ if bookdir == "" {
+ msg := fmt.Sprintf("Error opening PDF\nThe format of this PDF is not supported, extract the images to .jpg manually into a\nfolder first, using a tool like the PDF image extractor at https://pdfcandy.com/extract-images.html.\n")
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
+
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ return
+ }
+
+ savedir = strings.TrimSuffix(savedir, ".pdf")
+ bookname = strings.TrimSuffix(bookname, ".pdf")
+ }
+
+ if strings.Contains(training, "[") {
+ start := strings.Index(training, "[") + 1
+ end := strings.Index(training, "]")
+ training = training[start:end]
+ }
+
+ err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, wipe, bigpdf)
+ if err != nil && strings.HasSuffix(err.Error(), "context canceled") {
+ progressBar.SetValue(0.0)
+ return
+ }
+ if err != nil {
+ msg := fmt.Sprintf("Error during processing: %v\n", err)
+ if strings.HasSuffix(err.Error(), "No images found") && strings.HasSuffix(dir, ".pdf") && !f.IsDir() {
+ msg = fmt.Sprintf("Error opening PDF\nNo images found in the PDF. Most likely the format of this PDF is not supported,\nextract the images to .jpg manually into a folder first, using a tool like\nthe PDF image extractor at https://pdfcandy.com/extract-images.html.\n")
+ }
+ dialog.ShowError(errors.New(msg), win)
+ fmt.Fprintf(os.Stderr, msg)
+
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ return
+ }
+
+ progressBar.SetValue(1.0)
+
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+
+ msg := fmt.Sprintf("OCR process finished successfully.\n\nYour completed files have been saved in:\n%s", savedir)
+ dialog.ShowInformation("OCR Complete", msg, win)
+}
+
+// startGui starts the gui process
+func startGui(log *log.Logger, cmd string, gbookcmd string, training string, tessdir string) error {
+ myApp := app.New()
+ myWindow := myApp.NewWindow("Rescribe OCR")
+
+ myWindow.Resize(fyne.NewSize(800, 400))
+
+ var abortbtn, gobtn *widget.Button
+ var chosen *fyne.Container
+
+ dir := widget.NewLabel("")
+
+ dirIcon := widget.NewIcon(theme.FolderIcon())
+
+ folderBtn := widget.NewButtonWithIcon("Choose folder", theme.FolderOpenIcon(), func() {
+ d := dialog.NewFolderOpen(func(uri fyne.ListableURI, err error) {
+ if err != nil || uri == nil {
+ return
+ }
+ dir.SetText(uri.Path())
+ dirIcon.SetResource(theme.FolderIcon())
+ chosen.Show()
+ gobtn.Enable()
+ }, myWindow)
+ d.Resize(fyne.NewSize(740, 600))
+ d.Show()
+ })
+
+ pdfBtn := widget.NewButtonWithIcon("Choose PDF", theme.DocumentIcon(), func() {
+ d := dialog.NewFileOpen(func(uri fyne.URIReadCloser, err error) {
+ if err != nil || uri == nil {
+ return
+ }
+ uri.Close()
+ dir.SetText(uri.URI().Path())
+ dirIcon.SetResource(theme.DocumentIcon())
+ chosen.Show()
+ gobtn.Enable()
+ }, myWindow)
+ d.SetFilter(storage.NewExtensionFileFilter([]string{".pdf"}))
+ d.Resize(fyne.NewSize(740, 600))
+ d.Show()
+ })
+
+ gbookBtn := widget.NewButtonWithIcon("Get Google Book", theme.SearchIcon(), func() {
+ dirEntry := widget.NewEntry()
+ bookId := widget.NewEntry()
+ homeDir, err := os.UserHomeDir()
+ if err == nil {
+ dirEntry.SetText(homeDir)
+ }
+ dirEntry.Validator = func(s string) error {
+ if s == "" {
+ return fmt.Errorf("No save folder set")
+ }
+ return nil
+ }
+ dirBtn := widget.NewButtonWithIcon("Browse", theme.FolderIcon(), func() {
+ d := dialog.NewFolderOpen(func(uri fyne.ListableURI, err error) {
+ if err != nil || uri == nil {
+ return
+ }
+ dirEntry.SetText(uri.Path())
+ }, myWindow)
+ d.Resize(fyne.NewSize(740, 600))
+ d.Show()
+ })
+ bookId.Validator = func(s string) error {
+ _, err := getBookIdFromUrl(s)
+ return err
+ }
+ f1 := widget.NewFormItem("Book ID / URL", bookId)
+ saveDir := container.New(layout.NewBorderLayout(nil, nil, nil, dirBtn), dirEntry, dirBtn)
+ f2 := widget.NewFormItem("Save in folder", saveDir)
+ d := dialog.NewForm("Enter Google Book ID or URL", "OK", "Cancel", []*widget.FormItem{f1, f2}, func(b bool) {
+ if b != true {
+ return
+ }
+ id, err := getBookIdFromUrl(bookId.Text)
+ if err != nil {
+ return
+ }
+ if dirEntry.Text == "" {
+ dirEntry.SetText(homeDir)
+ }
+ dir.SetText(fmt.Sprintf("Google Book: %s Save to: %s", id, dirEntry.Text))
+ dirIcon.SetResource(theme.SearchIcon())
+ chosen.Show()
+ gobtn.Enable()
+ }, myWindow)
+ d.Resize(fyne.NewSize(600, 200))
+ d.Show()
+ })
+
+ wipe := widget.NewCheck("Automatically clean image sides", func(bool) {})
+
+ bigpdf := widget.NewCheck("Use highest image quality for searchable PDF (requires lots of RAM)", func(bool) {})
+ bigpdf.Checked = false
+
+ trainingLabel := widget.NewLabel("Language / Script")
+
+ trainingOpts := mkTrainingSelect([]string{training}, myWindow)
+
+ progressBar := widget.NewProgressBar()
+ progressBar.TextFormatter = formatProgressBar(progressBar)
+
+ logarea := widget.NewMultiLineEntry()
+
+ detail := widget.NewAccordion(widget.NewAccordionItem("Log", logarea))
+
+ var ctx context.Context
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithCancel(context.Background())
+
+ gobtn = widget.NewButtonWithIcon("Start OCR", theme.UploadIcon(), func() {})
+
+ disableWidgets := []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, wipe, bigpdf, trainingOpts, gobtn}
+
+ abortbtn = widget.NewButtonWithIcon("Abort", theme.CancelIcon(), func() {
+ fmt.Printf("\nAbort\n")
+ cancel()
+ progressBar.SetValue(0.0)
+ for _, v := range disableWidgets {
+ v.Enable()
+ }
+ abortbtn.Disable()
+ ctx, cancel = context.WithCancel(context.Background())
})
+ abortbtn.Disable()
+
+ gobtn.OnTapped = func() {
+ start(ctx, log, cmd, tessdir, gbookcmd, dir.Text, trainingOpts.Selected, myWindow, logarea, progressBar, abortbtn, !wipe.Checked, bigpdf.Checked, disableWidgets)
+ }
+
gobtn.Disable()
- diropener := container.New(layout.NewGridLayout(2), dir, openbtn)
+ choices := container.New(layout.NewGridLayout(3), folderBtn, pdfBtn, gbookBtn)
+
+ chosen = container.New(layout.NewBorderLayout(nil, nil, dirIcon, nil), dirIcon, dir)
+ chosen.Hide()
+
+ trainingBits := container.New(layout.NewBorderLayout(nil, nil, trainingLabel, nil), trainingLabel, trainingOpts)
- content := container.NewVBox(diropener, gobtn, progressBar, logarea)
+ startBox := container.NewVBox(choices, chosen, trainingBits, wipe, bigpdf, gobtn, abortbtn, progressBar)
+ startContent := container.NewBorder(startBox, nil, nil, nil, detail)
- myWindow.SetContent(content)
+ myWindow.SetContent(startContent)
myWindow.Show()
myApp.Run()
diff --git a/cmd/rescribe/gui_test.go b/cmd/rescribe/gui_test.go
new file mode 100644
index 0000000..99a924f
--- /dev/null
+++ b/cmd/rescribe/gui_test.go
@@ -0,0 +1,77 @@
+// Copyright 2022 Nick White.
+// Use of this source code is governed by the GPLv3
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "fyne.io/fyne/v2/app"
+ "fyne.io/fyne/v2/widget"
+)
+
+func TestFormatProgressBar(t *testing.T) {
+ cases := []struct {
+ val float64
+ str string
+ }{
+ {0.0, ""},
+ {0.01, "Processing"},
+ {0.11, "Downloading"},
+ {0.12, "Processing PDF"},
+ {0.2, "Preprocessing"},
+ {0.5, "OCRing"},
+ {0.55, "OCRing"},
+ {0.89, "OCRing"},
+ {0.9, "Analysing"},
+ {1.0, "Done"},
+ {1.1, "Processing"},
+ }
+
+ _ = app.New() // shouldn't be needed for test but we get a panic without it
+ bar := widget.NewProgressBar()
+
+ for _, c := range cases {
+ t.Run(fmt.Sprintf("%s_%.1f", c.str, c.val), func(t *testing.T) {
+ bar.Value = c.val
+ got := formatProgressBar(bar)()
+ if got != c.str {
+ t.Fatalf("Expected %s, got %s", c.str, got)
+ }
+ })
+ }
+}
+
+func TestUpdateProgress(t *testing.T) {
+ cases := []struct {
+ log string
+ val float64
+ }{
+ {"Downloading", 0.11},
+ {"Preprocessing", 0.2},
+ {"Preprocessing\nOCRing", 0.5},
+ {"Preprocessing\nOCRing...", 0.53},
+ {"OCRing........................................", 0.89},
+ {"OCRing..\nAnalysing", 0.9},
+ {"Done", 1.0},
+ {"Weirdness", 0.0},
+ }
+
+ _ = app.New() // shouldn't be needed for test but we get a panic without it
+ bar := widget.NewProgressBar()
+
+ for _, c := range cases {
+ t.Run(c.log, func(t *testing.T) {
+ l := strings.ReplaceAll(" "+c.log, "\n", "\n ")
+ bar.Value = 0.0
+ updateProgress(l, bar)
+ got := bar.Value
+ if got != c.val {
+ t.Fatalf("Expected %f, got %f", c.val, got)
+ }
+ })
+ }
+}
diff --git a/cmd/rescribe/icon.256.png b/cmd/rescribe/icon.256.png
new file mode 100644
index 0000000..79e922e
--- /dev/null
+++ b/cmd/rescribe/icon.256.png
Binary files differ
diff --git a/cmd/rescribe/icon.png b/cmd/rescribe/icon.png
new file mode 100644
index 0000000..dcfb0f5
--- /dev/null
+++ b/cmd/rescribe/icon.png
Binary files differ
diff --git a/cmd/rescribe/icon.svg b/cmd/rescribe/icon.svg
new file mode 100644
index 0000000..d60a36c
--- /dev/null
+++ b/cmd/rescribe/icon.svg
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ version="1.1"
+ id="svg2"
+ width="204.8"
+ height="204.8"
+ viewBox="0 0 204.8 204.8"
+ sodipodi:docname="icon-trans.svg"
+ inkscape:version="1.0.2 (e86c870879, 2021-01-15)">
+ <metadata
+ id="metadata8">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs6" />
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1051"
+ inkscape:window-height="1058"
+ id="namedview4"
+ showgrid="false"
+ inkscape:zoom="1.52608"
+ inkscape:cx="103.24875"
+ inkscape:cy="65.817935"
+ inkscape:window-x="0"
+ inkscape:window-y="17"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g10" />
+ <g
+ inkscape:groupmode="layer"
+ inkscape:label="Image"
+ id="g10">
+ <path
+ style="fill:#000000;stroke-width:0.16"
+ d="m 165.46012,194.36098 c 0.506,-0.0317 1.334,-0.0317 1.84,0 0.506,0.0318 0.092,0.0577 -0.92,0.0577 -1.012,0 -1.426,-0.026 -0.92,-0.0577 z m 11.92286,0.002 c 0.28759,-0.0356 0.71959,-0.0349 0.96,0.002 0.24044,0.0365 0.005,0.0656 -0.52286,0.0647 -0.528,-0.002 -0.7247,-0.0308 -0.43714,-0.0664 z m -13.08286,-0.76181 c -0.132,-0.0342 -0.66331,-0.10904 -1.1807,-0.16621 -0.71693,-0.0792 -1.02946,-0.19971 -1.3138,-0.50651 -0.24372,-0.26297 -0.76884,-0.50646 -1.51445,-0.70221 -0.62775,-0.16481 -1.37025,-0.44112 -1.65001,-0.61401 -0.79923,-0.49395 -2.30109,-0.65095 -6.18104,-0.6461 -3.91542,0.005 -6.65774,0.18683 -6.93072,0.45981 -0.21841,0.21842 -1.89226,0.22682 -2.38175,0.012 -0.20214,-0.0887 -1.19507,-0.30016 -2.2065,-0.46986 -1.53368,-0.25731 -1.89227,-0.37251 -2.16,-0.69388 -0.17657,-0.21196 -0.4528,-0.43248 -0.61386,-0.49005 -0.16106,-0.0576 -0.39937,-0.37589 -0.52957,-0.70735 -0.13428,-0.34184 -0.44096,-0.71987 -0.70861,-0.87348 -0.25954,-0.14896 -0.54282,-0.45741 -0.62952,-0.68544 -0.0883,-0.23229 -0.35766,-0.48434 -0.6125,-0.57319 -0.27445,-0.0957 -0.54453,-0.36221 -0.68092,-0.672 -0.12433,-0.28238 -0.41896,-0.60491 -0.65473,-0.71673 -0.26127,-0.12391 -0.50117,-0.42293 -0.61428,-0.76567 -0.10208,-0.30929 -0.35917,-0.67609 -0.57132,-0.81509 -0.21215,-0.139 -0.38572,-0.34692 -0.38572,-0.46206 0,-0.11512 -0.18,-0.42323 -0.4,-0.68469 -0.22,-0.26145 -0.4,-0.55499 -0.4,-0.65229 0,-0.0973 -0.11737,-0.17689 -0.26082,-0.17689 -0.2926,0 -1.75977,-1.42295 -2.22713,-2.16 -0.1674,-0.264 -0.40148,-0.54059 -0.52017,-0.61464 -0.1187,-0.0741 -0.3533,-0.45541 -0.52135,-0.84746 -0.22145,-0.51661 -0.43836,-0.7591 -0.78804,-0.88101 -0.26537,-0.0925 -0.48249,-0.2738 -0.48249,-0.40288 0,-0.57 -0.70555,-1.83969 -1.15,-2.06953 -0.25513,-0.13192 -0.53745,-0.40136 -0.62738,-0.59874 -0.0899,-0.19737 -0.31543,-0.4279 -0.5011,-0.5123 -0.18568,-0.0844 -0.47244,-0.4165 -0.63725,-0.73802 -0.16691,-0.32565 -0.45911,-0.62461 -0.65965,-0.67494 -0.19889,-0.0499 -0.52144,-0.3776 -0.72073,-0.73218 -0.1984,-0.35301 -0.47056,-0.70329 -0.60481,-0.77843 -0.13426,-0.0751 -0.39037,-0.70658 -0.56916,-1.40323 -0.17878,-0.69666 -0.415,-1.37464 -0.52494,-1.50664 -0.26978,-0.32394 -0.50169,-1.36013 -0.73408,-3.28 -0.19763,-1.63275 -0.57683,-2.68362 -1.03131,-2.85802 -0.13972,-0.0536 -0.36701,-0.36785 -0.50508,-0.69832 -0.13808,-0.33045 -0.42283,-0.76003 -0.63278,-0.95461 -0.20995,-0.19457 -0.38428,-0.51921 -0.3874,-0.72141 -0.003,-0.20225 -0.10857,-0.54769 -0.23433,-0.76769 -0.12576,-0.22 -0.23121,-0.55947 -0.23433,-0.75437 -0.003,-0.19491 -0.14967,-0.43144 -0.32567,-0.52563 -0.176,-0.0942 -0.32244,-0.29472 -0.32543,-0.44563 -0.003,-0.1509 -0.14454,-0.56237 -0.31457,-0.91437 -0.17003,-0.352 -0.31159,-0.72428 -0.31457,-0.82729 -0.003,-0.103 -0.21646,-0.23363 -0.47437,-0.29028 -0.52059,-0.11434 -0.74819,-0.32983 -0.83384,-0.78948 -0.0315,-0.16888 -0.26684,-0.4232 -0.52304,-0.56516 -0.46283,-0.25646 -0.63103,-0.46923 -0.79854,-1.01019 -0.0481,-0.15532 -0.26495,-0.34989 -0.48192,-0.43238 -0.2626,-0.0998 -0.44829,-0.34491 -0.55547,-0.73306 -0.0885,-0.32069 -0.41116,-0.82603 -0.71692,-1.12298 -0.36358,-0.3531 -0.64409,-0.84318 -0.81073,-1.4164 -0.14014,-0.48207 -0.32162,-0.91779 -0.40329,-0.96826 -0.19777,-0.12223 -0.76117,-1.33123 -0.88084,-1.89018 -0.0586,-0.27356 -0.24668,-0.50317 -0.48713,-0.59459 -0.21545,-0.0819 -0.51958,-0.42047 -0.67585,-0.75234 -0.15627,-0.33187 -0.47465,-0.69472 -0.70752,-0.80633 -0.24035,-0.1152 -0.61102,-0.56175 -0.85738,-1.03289 -0.23868,-0.45648 -0.60333,-0.96319 -0.81033,-1.12601 -0.207,-0.16283 -0.42412,-0.55061 -0.48249,-0.86174 -0.0719,-0.38349 -0.23169,-0.6229 -0.496,-0.74332 -0.21695,-0.0988 -0.42633,-0.35994 -0.47208,-0.58865 -0.0452,-0.22605 -0.44066,-0.80643 -0.87879,-1.28974 -0.43813,-0.4833 -0.83797,-1.07849 -0.88853,-1.32263 -0.0506,-0.24415 -0.29307,-0.58713 -0.53892,-0.76219 -0.24584,-0.17505 -0.49402,-0.53242 -0.5515,-0.79415 -0.0575,-0.26172 -0.26346,-0.60274 -0.45772,-0.75781 -0.19426,-0.15507 -0.59686,-0.65995 -0.89466,-1.12195 -0.29781,-0.462 -0.596191,-0.84 -0.663091,-0.84 -0.26013,0 -0.6238,-0.52327 -0.788332,-1.13431 -0.111492,-0.41405 -0.305253,-0.70049 -0.54896,-0.81153 -0.26961,-0.12284 -0.4771,-0.47322 -0.730103,-1.23289 -0.194398,-0.5837 -0.478317,-1.20527 -0.630929,-1.38127 -0.152612,-0.176 -0.44348,-0.84829 -0.646375,-1.49398 -0.218997,-0.69694 -0.494358,-1.24957 -0.677645,-1.36 -0.330003,-0.19883 -0.815212,-1.04979 -0.972753,-1.70602 -0.05282,-0.22 -0.261032,-0.472 -0.462704,-0.56 -0.223637,-0.0976 -0.446171,-0.40966 -0.570459,-0.8 -0.112079,-0.352 -0.328079,-0.748 -0.48,-0.88 -0.15192,-0.132 -0.519188,-0.72245 -0.816149,-1.31211 -0.296963,-0.58965 -0.644237,-1.09365 -0.77172,-1.12 -0.231253,-0.0478 -0.833223,-1.54669 -0.843125,-2.0994 -0.0085,-0.47195 -0.794578,-1.6798 -1.340358,-2.0594 -0.277676,-0.19313 -0.744477,-0.62519 -1.037333,-0.96012 -0.292858,-0.33493 -0.621387,-0.60897 -0.730066,-0.60897 -0.108678,0 -0.400987,-0.21229 -0.649573,-0.47176 -0.248585,-0.25947 -0.604056,-0.49347 -0.789934,-0.52 -0.220666,-0.0315 -0.397059,-0.23108 -0.508245,-0.57506 -0.09366,-0.28976 -0.309656,-0.6837 -0.48,-0.87544 -0.170342,-0.19173 -0.381715,-0.47144 -0.469715,-0.62157 -0.088,-0.15013 -0.484,-0.64146 -0.88,-1.09184 -0.396,-0.45037 -0.796198,-1.0856 -0.889328,-1.4116 -0.120624,-0.42224 -0.31471,-0.66176 -0.674765,-0.83273 -0.277989,-0.132 -0.574342,-0.43311 -0.65856,-0.66914 -0.108502,-0.30407 -0.324571,-0.47647 -0.741438,-0.59157 -0.323571,-0.0893 -0.6766,-0.28318 -0.784507,-0.43076 -0.186928,-0.25564 -1.122752,-0.49761 -3.371402,-0.87174 -1.623947,-0.27019 -1.642797,-0.27926 -1.843251,-0.88664 -0.112922,-0.34216 -0.336261,-0.62632 -0.558741,-0.71091 -0.203845,-0.0775 -0.415181,-0.28129 -0.469637,-0.45287 -0.05446,-0.17157 -0.294617,-0.38165 -0.533691,-0.46684 -0.916472,-0.32656 -1.58611,-0.51957 -1.80604,-0.52054 -0.125752,-4.8e-4 -0.34691,-0.20892 -0.491462,-0.46304 -0.144552,-0.25411 -0.467672,-0.5602 -0.718044,-0.6802 -0.250371,-0.119999 -0.481849,-0.361639 -0.514395,-0.536959 -0.07988,-0.43034 -0.336853,-0.72371 -0.782264,-0.89305 -0.203872,-0.0775 -0.399901,-0.23201 -0.435621,-0.34333 -0.03572,-0.11133 -0.178537,-0.61176 -0.317371,-1.11209 -0.175254,-0.63158 -0.349691,-0.94056 -0.570582,-1.01067 -0.442239,-0.14036 -0.573287,-0.79176 -0.256539,-1.27518 0.1417,-0.21626 0.257638,-0.50374 0.257638,-0.63884 0,-0.13509 0.06558,-0.24563 0.145726,-0.24563 0.08015,0 0.302573,-0.324 0.494274,-0.72 0.232416,-0.4801 0.450296,-0.72 0.653933,-0.72 0.167961,0 0.788699,-0.252 1.379414,-0.56 0.590717,-0.308001 1.188471,-0.560001 1.328341,-0.560001 0.139872,0 0.338722,-0.0844 0.44189,-0.18757 0.1276,-0.1276 1.608691,-0.22996 4.632,-0.32011 6.455761,-0.19251 8.595198,-0.33908 8.923228,-0.61133 0.205885,-0.17087 1.017452,-0.25563 3.187796,-0.33294 1.662859,-0.0592 3.057577,-0.18184 3.251976,-0.28588 0.187212,-0.10019 0.491547,-0.18217 0.676299,-0.18217 0.213065,0 0.454461,-0.20092 0.660059,-0.54938 0.209195,-0.35456 0.490286,-0.58587 0.792654,-0.65228 0.25768,-0.0566 0.618082,-0.29305 0.80089,-0.52546 0.182808,-0.2324 0.535539,-0.50797 0.783845,-0.61238 0.248305,-0.1044 0.5734,-0.39624 0.722432,-0.64853 0.239653,-0.40571 0.755515,-0.69784 1.899278,-1.07559 0.16172,-0.0534 0.374835,-0.31117 0.473593,-0.5728 0.10965,-0.29051 0.35547,-0.53702 0.63145,-0.63322 0.2905,-0.10127 0.536001,-0.35882 0.687381,-0.72114 0.15234,-0.3646 0.37688,-0.5991 0.63591,-0.66411 0.26026,-0.0653 0.43451,-0.24831 0.49781,-0.5228 0.28205,-1.223 0.59068,-1.91545 0.93612,-2.10033 0.22303,-0.11936 0.40954,-0.40124 0.46373,-0.70082 0.0499,-0.27564 0.1374,-0.77792 0.19456,-1.11619 0.0809,-0.47895 0.19972,-0.64843 0.53682,-0.76595 0.37969,-0.13236 0.46776,-0.30255 0.71669,-1.38496 0.15609,-0.67873 0.4659,-1.50332 0.68847,-1.83242 0.39281,-0.58081 0.83586,-2.061488 0.84093,-2.810415 10e-4,-0.202828 0.1468,-0.685528 0.32316,-1.072665 0.46654,-1.024115 0.77318,-2.665963 0.96165,-5.148922 0.18858,-2.484496 0.24991,-2.821045 0.63593,-3.489635 0.15243,-0.264 0.36507,-0.948 0.47253,-1.52 0.38313,-2.03931 0.55564,-2.643752 0.83318,-2.919368 0.15472,-0.153653 0.319,-0.513653 0.36506,-0.8 0.26373,-1.639546 0.43072,-4.370296 0.33514,-5.480363 -0.0936,-1.087058 -0.19276,-1.405051 -0.65702,-2.107112 -0.30072,-0.454763 -0.63685,-1.097456 -0.74695,-1.428205 -0.12611,-0.378854 -0.37615,-0.698438 -0.67575,-0.863723 -0.39396,-0.217335 -0.52378,-0.449922 -0.7564,-1.355219 -0.15446,-0.601072 -0.38158,-1.208317 -0.50471,-1.349434 -0.26351,-0.301987 -0.90332,-1.729424 -0.90332,-2.015338 0,-0.108945 -0.13482,-0.369793 -0.29959,-0.57966 -0.16478,-0.209868 -0.42793,-0.813578 -0.58479,-1.341578 -0.15685,-0.528 -0.37327,-1.248 -0.48092,-1.6 -0.12587,-0.41156 -0.3439,-0.713754 -0.61084,-0.84663 -0.27195,-0.135373 -0.47191,-0.417623 -0.57985,-0.818461 -0.0906,-0.336507 -0.37869,-0.881045 -0.64018,-1.210083 -0.26148,-0.32904 -0.59802,-0.876117 -0.74786,-1.215728 -0.16014,-0.36297 -0.44214,-0.684943 -0.6842,-0.781165 -0.43059,-0.171173 -1.135431,-0.563855 -1.835991,-1.022879 -0.227149,-0.148836 -1.187628,-0.388396 -2.134391,-0.532355 -1.059745,-0.161137 -2.090377,-0.430049 -2.681387,-0.699627 -0.947051,-0.431978 -1.407837,-0.571677 -2.452046,-0.743398 -0.355965,-0.05854 -0.590092,-0.214888 -0.707431,-0.472421 -0.09646,-0.211709 -0.36104,-0.426034 -0.587953,-0.476279 -1.080445,-0.239236 -1.800564,-0.611 -2.025282,-1.045556 -0.129216,-0.249876 -0.470499,-0.552744 -0.758408,-0.67304 -0.287909,-0.120296 -0.641312,-0.398568 -0.785339,-0.618381 -0.144027,-0.219815 -0.472408,-0.44177 -0.729735,-0.493235 -0.319774,-0.06396 -0.529201,-0.241655 -0.661649,-0.561413 -0.106583,-0.257311 -0.363674,-0.545245 -0.571317,-0.639853 -0.207642,-0.09461 -0.498979,-0.427949 -0.647416,-0.740755 -0.155971,-0.328685 -0.38679,-0.568741 -0.546856,-0.568741 -0.362566,0 -0.806568,-0.499728 -0.806568,-0.9078 0,-0.364202 -0.392246,-0.763336 -0.902725,-0.918574 -0.188499,-0.05732 -0.375219,-0.228485 -0.414936,-0.38036 -0.07688,-0.294007 -1.038656,-0.55521 -3.025982,-0.82181 -0.585003,-0.07848 -1.210271,-0.221162 -1.389482,-0.317072 -0.179211,-0.09591 -0.924571,-0.174786 -1.656357,-0.175277 -0.731785,-4.8e-4 -1.60827,-0.07801 -1.947745,-0.172277 -0.715248,-0.198601 -2.93101,-0.0076 -5.862773,0.50537 -2.699179,0.47228 -3.00511,0.585914 -3.310595,1.229675 -0.143043,0.30144 -0.416776,0.619471 -0.608298,0.706733 -0.191521,0.08726 -0.473161,0.370168 -0.625867,0.628678 -0.152706,0.258511 -0.455418,0.586498 -0.672693,0.728863 -0.385229,0.252411 -0.884523,1.169486 -1.074648,1.973851 -0.052,0.22 -0.25471,0.472 -0.450467,0.56 -0.858486,0.385923 -1.153374,2.905253 -0.709261,6.059466 0.482659,3.427966 0.603747,6.134964 0.351611,7.860534 -0.271,1.85468 -0.773497,4.283797 -0.910201,4.4 -0.248987,0.211646 -0.653117,4.35011 -0.794581,8.136843 -0.15948,4.269018 -0.351574,6.188768 -0.689211,6.887823 -0.275059,0.569494 -0.246151,5.8951 0.03472,6.396627 0.170552,0.304539 0.223048,2.478576 0.260781,10.800004 0.03898,8.59637 0.09122,10.67057 0.298669,11.8587 0.138284,0.792 0.361278,2.232 0.49554,3.200001 0.134263,0.968 0.285978,1.904 0.337146,2.08 0.05117,0.176 0.121594,0.824 0.156502,1.44 0.03491,0.616 0.145855,1.8271 0.246546,2.69132 0.165278,1.418559 0.109611,3.115959 -0.121981,3.719479 -0.04837,0.12606 -0.186877,0.2292 -0.307785,0.2292 -0.120908,0 -0.264426,0.198 -0.318927,0.44 -0.0545,0.242 -0.157294,0.692 -0.22843,1 -0.07113,0.308 -0.174304,0.92 -0.229263,1.36 -0.124932,1.0002 -0.442713,2.16 -0.591832,2.16 -0.199993,0 -0.434979,0.99029 -0.777235,3.27545 -0.194677,1.29981 -0.432213,2.34308 -0.567526,2.4926 -0.365883,0.40429 -0.313159,3.22572 0.08728,4.67076 0.176,0.63512 0.32,1.4614 0.32,1.83619 0,0.37479 0.113642,1.70824 0.252538,2.96322 0.182758,1.65128 0.334057,2.39692 0.547556,2.69847 0.466901,0.65946 0.602631,1.45779 0.798196,4.69482 0.209596,3.46928 0.341625,4.34739 0.694172,4.61689 0.44898,0.34322 0.766951,3.34972 0.883272,8.3516 0.128058,5.50655 0.126464,5.92129 -0.04461,11.6 -0.142469,4.72915 0.01435,7.17776 0.493624,7.70734 0.137221,0.15164 0.371987,0.83751 0.521701,1.52418 0.227876,1.04515 0.338984,1.28086 0.682244,1.44733 0.291381,0.14129 0.440704,0.3728 0.516013,0.8 0.16748,0.95005 0.60905,1.71433 1.024576,1.77337 0.233408,0.0332 0.462898,0.25216 0.628773,0.6 0.143666,0.30128 0.353072,0.54778 0.465346,0.54778 0.297598,0 0.856598,0.65149 0.856598,0.99834 0,0.18916 0.203941,0.3832 0.558485,0.53134 0.373872,0.15621 0.633841,0.4137 0.786445,0.77893 0.151769,0.36323 0.352808,0.56326 0.601513,0.59848 0.256274,0.0363 0.425088,0.21233 0.537687,0.56067 0.192532,0.59565 0.674905,1.17224 0.980668,1.17224 0.11836,0 0.215202,0.0968 0.215202,0.2152 0,0.11837 0.126,0.32808 0.28,0.46603 0.226944,0.20331 0.68929,0.24803 2.44,0.236 1.188,-0.008 2.540133,0.0412 3.004741,0.10963 0.784417,0.11559 0.86879,0.17191 1.181533,0.7888 0.185235,0.3654 0.540011,0.89733 0.78839,1.18208 0.248381,0.28477 0.555413,0.86077 0.682296,1.28 0.126883,0.41925 0.388062,1.12226 0.580397,1.56226 0.381443,0.87262 0.358291,1.94536 -0.06215,2.88 -0.265611,0.59045 -0.551843,1.40848 -0.768037,2.19499 -0.09491,0.3453 -0.290049,0.63514 -0.464985,0.69066 -0.165791,0.0526 -0.347275,0.28888 -0.403299,0.52502 -0.141876,0.59799 -0.521444,0.98933 -0.959572,0.98933 -0.250099,0 -0.451632,0.15574 -0.618112,0.47768 -0.13586,0.26272 -0.426817,0.52282 -0.646571,0.57797 -0.219752,0.0551 -0.560443,0.30406 -0.757089,0.55312 -0.314512,0.39835 -0.540453,0.48136 -1.877538,0.68976 -2.691163,0.41945 -7.520941,0.62888 -10.174398,0.44117 -1.31208,-0.0928 -3.213602,-0.21245 -4.225602,-0.26584 -2.640154,-0.13932 -3.51511,-0.30495 -4.415603,-0.83589 -0.438056,-0.25829 -1.056515,-0.5184 -1.374354,-0.57802 -0.317838,-0.0596 -0.665939,-0.21451 -0.773557,-0.34419 -0.107614,-0.12966 -0.297861,-0.23576 -0.422765,-0.23576 -0.124904,0 -0.367323,-0.13064 -0.538708,-0.2903 -0.213912,-0.1993 -0.87649,-0.37368 -2.113309,-0.5562 -2.937026,-0.43342 -3.919114,-0.47032 -5.641701,-0.21193 -0.88,0.13198 -3.076,0.32365 -4.88,0.42589 -2.836818,0.1608 -3.32463,0.22689 -3.610312,0.48923 -0.181672,0.16681 -0.422563,0.30331 -0.535314,0.30331 -0.11275,0 -0.39611,0.0999 -0.629688,0.22197 -0.233577,0.12208 -0.624132,0.3012 -0.867899,0.39803 -0.243766,0.0968 -0.505313,0.33894 -0.581216,0.53803 -0.116958,0.30677 -0.395352,0.41765 -1.825558,0.72711 -1.458335,0.31553 -1.792311,0.33956 -2.458784,0.17689 -0.920535,-0.22465 -1.171229,-0.39787 -1.171229,-0.80926 0,-0.16943 -0.193232,-0.48714 -0.429403,-0.706 -0.550967,-0.51063 -0.70364,-1.56125 -0.509591,-3.50677 0.112396,-1.12686 0.211263,-1.49219 0.454647,-1.68 0.171059,-0.132 0.404017,-0.528 0.517683,-0.88 0.12344,-0.38227 0.351637,-0.7036 0.566664,-0.79794 0.198,-0.0869 0.36,-0.21905 0.36,-0.29374 0,-0.29661 1.198315,-1.82742 1.61991,-2.06938 0.246732,-0.14161 0.490644,-0.42496 0.542024,-0.62969 0.05138,-0.20472 0.306618,-0.49458 0.567191,-0.64411 0.260573,-0.14956 0.543334,-0.4549 0.628361,-0.67853 0.08503,-0.22363 0.252447,-0.40661 0.372047,-0.40661 0.119598,0 0.544233,-0.34514 0.94363,-0.76698 0.399399,-0.42184 1.059327,-0.94372 1.466507,-1.15972 0.407181,-0.21602 0.74033,-0.47647 0.74033,-0.5788 0,-0.10232 0.231173,-0.38058 0.513718,-0.61832 0.502255,-0.42261 1.086282,-1.47213 1.086282,-1.95207 0,-0.13253 0.238528,-0.46667 0.530061,-0.74253 0.525169,-0.49696 0.714664,-0.95915 1.0912,-2.66158 0.107049,-0.484 0.32704,-1.10608 0.488865,-1.38242 0.419732,-0.7167 0.599647,-2.43051 0.60233,-5.73758 0.0029,-3.58661 -0.205211,-6.19294 -0.568931,-7.12481 -0.172877,-0.44292 -0.303091,-1.31052 -0.342043,-2.27898 -0.05378,-1.33715 -0.01778,-1.6386 0.248424,-2.08 0.292256,-0.4846 0.448665,-1.4733 0.916942,-5.79621 0.0572,-0.528 0.159405,-1.032 0.227131,-1.12 0.265983,-0.3456 0.644075,-3.73301 0.901867,-8.08 0.222023,-3.74385 0.249861,-5.87729 0.155536,-11.92 -0.203592,-13.04284 -0.40428,-17.63254 -0.797558,-18.24 -0.135587,-0.20943 -0.271862,-1.50863 -0.394392,-3.76 -0.20999,-3.858349 -0.06674,-6.178229 0.449186,-7.274379 0.184046,-0.39104 0.435003,-1.655081 0.636425,-3.205621 0.290786,-2.23845 0.312047,-2.8313 0.169259,-4.72 -0.08981,-1.188 -0.230908,-3.40231 -0.313545,-4.9207 -0.08264,-1.51838 -0.224613,-2.89965 -0.315503,-3.06948 -0.09089,-0.16983 -0.165254,-0.45151 -0.165254,-0.62597 0,-0.17445 -0.07358,-0.34171 -0.163504,-0.37168 -0.350392,-0.1168 -0.491906,-2.04264 -0.574048,-7.812167 -0.05964,-4.189254 -0.01933,-7.03816 0.133574,-9.44 0.323008,-5.073798 -0.08867,-11.863782 -0.92644,-15.28 -0.343,-1.398675 -0.412467,-2.193906 -0.552089,-6.32 -0.137283,-4.056954 -0.123547,-5.000952 0.09778,-6.72 0.512008,-3.976706 0.442685,-6.059803 -0.233256,-7.009074 -0.318431,-0.447203 -1.759841,-1.310926 -2.18771,-1.310926 -0.118889,0 -0.310245,-0.129768 -0.425235,-0.288373 -0.182963,-0.25236 -0.879309,-0.4656 -2.529072,-0.774469 -1.062438,-0.198908 -1.6,-0.407611 -1.6,-0.621177 0,-0.127427 -0.234,-0.339307 -0.52,-0.470845 -0.956173,-0.439765 -1.561227,-0.843541 -1.88961,-1.261011 -0.179092,-0.22768 -0.510494,-0.506251 -0.736446,-0.619045 -0.478019,-0.238624 -0.768453,-0.980603 -0.907034,-2.317221 -0.141195,-1.361835 0.343932,-2.920038 0.91268,-2.931492 0.09923,-0.002 0.294247,-0.235999 0.433381,-0.52 0.158304,-0.32313 0.440269,-0.578477 0.753466,-0.682337 0.275272,-0.09128 0.665433,-0.319883 0.867029,-0.507998 0.285508,-0.26642 0.709988,-0.380049 1.920228,-0.51403 2.054674,-0.227463 8.044392,-0.219867 11.246306,0.01426 2.060832,0.150692 3.293154,0.143007 6.32,-0.03941 2.501347,-0.150749 6.143408,-0.212951 10.88,-0.185818 7.314414,0.0419 10.413752,-0.109958 10.889712,-0.533565 0.250366,-0.222828 1.150192,-0.337126 5.830288,-0.7405764 2.971763,-0.256183 10.731882,-0.234943 14.764398,0.04041 2.701808,0.184489 3.008298,0.235474 3.306708,0.5500784 0.309217,0.325998 1.153246,0.617664 2.248894,0.777134 0.264,0.03843 0.876,0.186904 1.36,0.329952 1.879128,0.555385 5.576279,0.928045 7.769529,0.78314 1.10414,-0.07295 1.38502,-0.03649 1.95363,0.253597 0.36525,0.186337 0.77243,0.338794 0.90484,0.338794 0.13241,0 0.32085,0.14969 0.41877,0.332645 0.0979,0.182954 0.37672,0.401911 0.61958,0.48657 0.25072,0.0874 0.53924,0.367387 0.6676,0.647838 0.21264,0.464607 0.61264,0.693978 1.82605,1.047106 0.176,0.05122 0.54571,0.334487 0.82158,0.629484 0.27587,0.294997 0.64897,0.536357 0.8291,0.536357 0.1813,0 0.48013,0.214302 0.66932,0.48 0.18798,0.264 0.42689,0.480658 0.53089,0.481462 0.10401,8e-4 0.52898,0.166079 0.94438,0.367274 0.58542,0.283546 0.84936,0.544075 1.17369,1.158536 0.23014,0.436 0.58504,0.936728 0.78868,1.112728 0.20364,0.176 0.49402,0.518414 0.64531,0.76092 0.26778,0.429254 1.45559,1.078059 1.97739,1.080091 0.14281,4.8e-4 0.37807,0.20917 0.5228,0.463584 0.16832,0.295898 0.44824,0.503227 0.77686,0.575405 0.31817,0.06988 0.60936,0.279464 0.76495,0.550571 0.13818,0.240755 0.39521,0.503368 0.57118,0.583584 0.17597,0.08021 0.45839,0.415845 0.6276,0.745845 0.1692,0.33 0.38919,0.6 0.48886,0.6 0.0997,0 0.55399,0.154501 1.00958,0.343334 0.63923,0.264944 0.84698,0.438469 0.90993,0.76 0.22062,1.127008 0.43652,1.68904 0.79694,2.074528 0.26514,0.283591 0.51723,0.913581 0.76808,1.919504 0.30472,1.221941 0.43967,1.522564 0.73368,1.634343 0.21136,0.08036 0.39844,0.321163 0.45264,0.582632 0.23084,1.113643 0.50341,1.755806 0.90226,2.125659 0.23724,0.22 0.48052,0.598798 0.54062,0.841773 0.0601,0.242974 0.30919,0.584137 0.55356,0.758139 0.35896,0.255603 0.47657,0.508787 0.61236,1.318227 0.0924,0.551024 0.21287,1.217861 0.26762,1.481861 0.0548,0.264 0.1313,0.636965 0.17009,0.828811 0.0388,0.191847 0.16042,0.445165 0.27029,0.562931 0.39387,0.422178 0.71627,1.185031 0.91996,2.176772 0.11393,0.554683 0.35849,1.199889 0.54347,1.433793 0.18911,0.239125 0.41538,0.883229 0.51692,1.471487 0.41492,2.403811 0.57999,2.996137 1.00503,3.606206 0.24523,0.352 0.49339,0.885987 0.55146,1.186638 0.0581,0.300652 0.28703,0.78454 0.50881,1.075309 0.65607,0.860149 0.79216,1.987418 0.7893,6.538053 -0.003,4.99048 -0.21416,6.982606 -0.80193,7.570379 -0.2163,0.216291 -0.39326,0.462359 -0.39326,0.546815 0,0.08446 -0.11433,0.546824 -0.25406,1.027486 -0.18641,0.641238 -0.35453,0.912128 -0.63139,1.017392 -0.25883,0.09841 -0.41001,0.317611 -0.48137,0.69799 -0.0572,0.304989 -0.29509,0.73844 -0.5286,0.963224 -0.23352,0.224784 -0.60307,0.590501 -0.82122,0.812706 -0.21814,0.222205 -0.52311,0.404008 -0.67769,0.404008 -0.18253,0 -0.44839,0.331478 -0.75838,0.945552 -0.26252,0.520053 -0.68605,1.094187 -0.94117,1.275847 -0.25512,0.18167 -0.55126,0.54131 -0.65809,0.79921 -0.10683,0.25791 -0.3233,0.50267 -0.48105,0.54393 -0.16565,0.0433 -0.38447,0.36792 -0.51794,0.76833 -0.1544,0.46321 -0.38675,0.77956 -0.70007,0.95317 -0.25793,0.14292 -0.4944,0.39803 -0.52549,0.56691 -0.0709,0.38524 -0.48905,0.86705 -0.75247,0.86705 -0.10967,0 -0.34874,0.25311 -0.53126,0.56246 -0.18252,0.30936 -0.50779,0.62935 -0.72281,0.7111 -0.21502,0.0818 -0.48679,0.35064 -0.60394,0.59754 -0.11715,0.2469 -0.43777,0.5569 -0.71248,0.6889 -0.27472,0.132 -0.61522,0.44122 -0.75667,0.68716 -0.14145,0.24594 -0.45669,0.51704 -0.70052,0.60245 -0.24383,0.0854 -0.56598,0.37018 -0.71589,0.63284 -0.15419,0.27016 -0.40625,0.47755 -0.58041,0.47755 -0.31999,0 -0.61639,0.27834 -0.90078,0.84591 -0.0914,0.18248 -0.36532,0.38486 -0.60862,0.44973 -0.46338,0.12356 -3.63763,3.16112 -3.63763,3.48099 0,0.10085 -0.13145,0.18337 -0.29211,0.18337 -0.16066,0 -0.46121,0.18 -0.66789,0.4 -0.20668,0.22 -0.51067,0.4 -0.67553,0.4 -0.16486,0 -0.45831,0.16976 -0.65211,0.37724 -0.42544,0.45547 -1.1108,0.57414 -4.23351,0.73305 -2.02918,0.10327 -2.32192,0.15439 -2.65031,0.4629 -0.20304,0.19075 -0.47612,0.34681 -0.60685,0.34681 -0.13073,0 -0.30609,0.0664 -0.38969,0.14748 -0.0836,0.0811 -0.3991,0.21042 -0.70112,0.28735 -0.40908,0.1042 -0.58797,0.26041 -0.70148,0.61252 -0.20374,0.632001 -0.41108,0.872651 -0.75187,0.872651 -0.43035,0 -0.49951,0.33632 -0.67455,3.28 -0.12766,2.147 -0.11552,3.03938 0.0576,4.23609 0.19551,1.351149 0.2629,1.537629 0.61938,1.714129 0.25344,0.12549 0.44078,0.39072 0.51131,0.72392 0.35673,1.68523 0.45789,1.88586 0.95087,1.88586 0.18757,0 0.40269,0.20905 0.56468,0.54875 0.15473,0.32447 0.41869,0.58815 0.64585,0.64516 0.2113,0.053 0.53149,0.3461 0.71154,0.65126 0.22321,0.37831 0.44485,0.55483 0.69668,0.55483 0.53839,0 2.66603,2.17619 3.20532,3.27846 0.3386,0.69206 0.51121,0.88154 0.80308,0.88154 0.40791,0 0.55008,0.15824 0.79067,0.88 0.1068,0.32039 0.28421,0.49759 0.53356,0.5329 0.24895,0.0353 0.4498,0.2354 0.60214,0.6 0.12573,0.30091 0.34081,0.5471 0.47796,0.5471 0.38659,0 1.70258,0.82311 2.13639,1.33624 0.21453,0.25376 0.6643,0.75548 0.99949,1.11495 0.3352,0.35946 0.65568,0.801 0.71219,0.98119 0.0565,0.18019 0.27274,0.48618 0.48051,0.67998 0.20777,0.1938 0.37776,0.527 0.37776,0.74046 0,0.6075 0.42553,1.47468 0.86489,1.76257 0.23756,0.15565 0.44067,0.47816 0.49987,0.79371 0.0548,0.29234 0.34119,0.83667 0.63632,1.20963 0.29514,0.37296 0.69151,1.16032 0.88083,1.74969 0.18931,0.58937 0.48533,1.22341 0.65781,1.40898 0.17247,0.18557 0.39008,0.61757 0.48357,0.96 0.20873,0.76455 0.40779,1.0226 0.78881,1.0226 0.19358,0 0.40159,0.24566 0.61333,0.72432 0.17622,0.39838 0.88983,1.31892 1.5858,2.04563 0.69596,0.72672 1.316,1.48078 1.37786,1.67568 0.0619,0.1949 0.22947,0.35437 0.37247,0.35437 0.32903,0 1.93896,1.67694 2.19537,2.28674 0.10608,0.2523 0.36165,0.543 0.56792,0.64599 0.22977,0.11473 0.42579,0.40419 0.50605,0.74727 0.31144,1.33139 0.47478,1.76 0.67072,1.76 0.30322,0 0.73838,0.53329 0.73838,0.90488 0,0.17583 0.324,0.67259 0.72,1.10392 0.396,0.43132 0.72,0.83275 0.72,0.89206 0,0.0593 0.20605,0.17967 0.45789,0.26746 0.39261,0.13686 0.49902,0.31155 0.74632,1.22515 0.22845,0.84394 0.3783,1.11201 0.72056,1.289 0.35865,0.18546 0.45074,0.3675 0.54153,1.0705 0.16133,1.24911 0.61092,2.51415 0.93013,2.61714 0.15697,0.0506 0.35386,0.37888 0.45561,0.75954 0.0986,0.36869 0.43365,1.02574 0.74468,1.46011 0.31103,0.43437 0.62991,1.023 0.70862,1.30808 0.0957,0.3465 0.28016,0.564 0.55656,0.65613 0.33406,0.11135 0.48244,0.35173 0.77278,1.25192 0.23078,0.71553 0.47594,1.17359 0.68526,1.28036 0.564,0.28768 0.84006,0.58255 0.84006,0.8973 0,0.17826 0.16743,0.36803 0.4041,0.45801 0.22226,0.0845 0.44478,0.28072 0.49451,0.43604 0.21988,0.68683 0.45762,0.99767 0.87407,1.14284 0.24574,0.0857 0.55722,0.36931 0.6922,0.63031 0.13672,0.2644 0.46052,0.54955 0.73118,0.6439 0.2735,0.0953 0.55122,0.34149 0.63555,0.56329 0.11249,0.29586 0.36736,0.45719 1.02371,0.648 0.48066,0.13973 0.98788,0.25406 1.12716,0.25406 0.13927,0 0.35845,0.25184 0.48706,0.55965 0.12861,0.30779 0.41399,0.64173 0.63418,0.74205 0.2202,0.10032 0.47103,0.38515 0.55741,0.63296 0.1094,0.3138 0.31232,0.49076 0.66864,0.5831 0.3929,0.10181 0.55317,0.26128 0.6909,0.68741 0.12674,0.39211 0.28871,0.57032 0.55229,0.60766 0.22262,0.0315 0.50452,0.27579 0.6993,0.60591 0.17948,0.3042 0.55902,0.65032 0.84343,0.76915 0.38294,0.16 0.54364,0.34875 0.61939,0.72749 0.0735,0.36764 0.23044,0.55979 0.55813,0.68348 0.25478,0.0962 0.52743,0.3617 0.61813,0.60197 0.11568,0.30648 0.31518,0.46048 0.69496,0.53643 0.59263,0.11853 1.1137,0.51668 1.1137,0.85096 0,0.12026 0.17653,0.29908 0.39229,0.39738 0.21576,0.0983 0.50714,0.42077 0.64751,0.71658 0.1418,0.29881 0.37631,0.53828 0.52771,0.53888 0.14987,0 0.48849,0.14409 0.75249,0.31894 0.264,0.17485 0.59772,0.31837 0.7416,0.31894 0.14388,0 0.37954,0.18106 0.52369,0.40106 0.16728,0.2553 0.41117,0.4 0.6742,0.4 0.24669,0 0.58294,0.18286 0.83763,0.4555 0.42947,0.45976 1.86006,1.3045 2.2092,1.3045 0.10387,0 0.27323,0.25562 0.37634,0.56805 0.10312,0.31243 0.3247,0.65376 0.49241,0.75849 0.16771,0.10474 0.30493,0.26765 0.30493,0.36202 0,0.0944 0.216,0.37851 0.48,0.63144 0.264,0.25293 0.48,0.5904 0.48,0.74994 0,0.15953 0.0955,0.29006 0.21211,0.29006 0.11666,0 0.38296,0.18186 0.59178,0.40413 0.89246,0.95 2.34227,1.32553 5.65206,1.46405 1.94954,0.0816 2.42333,0.14878 2.64,0.37438 0.38042,0.39613 1.16914,0.60496 2.95687,0.78294 0.86505,0.0861 1.66318,0.23159 1.77363,0.32325 0.23795,0.19749 0.48189,0.98327 0.74854,2.41125 0.10946,0.58611 0.32314,1.16491 0.48962,1.32624 0.16656,0.16139 0.29529,0.51029 0.29518,0.8 -1.1e-4,0.28256 0.0827,1.10674 0.18394,1.83149 0.13849,0.99117 0.1388,1.76437 0.002,3.12 -0.16573,1.63315 -0.22053,1.82979 -0.58392,2.09561 -0.22056,0.16135 -0.40103,0.3925 -0.40103,0.51367 0,0.12117 -0.12129,0.61424 -0.26955,1.09571 l -0.26955,0.87538 -0.80674,0.11096 c -0.44369,0.061 -2.7164,0.0916 -5.05044,0.0679 -2.33404,-0.0236 -5.46772,0 -6.96372,0.0521 -1.496,0.0523 -2.828,0.0671 -2.96,0.0328 z"
+ id="path32" />
+ </g>
+</svg>
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 59d8166..96f2853 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -1,4 +1,4 @@
-// Copyright 2021 Nick White.
+// Copyright 2021-2022 Nick White.
// Use of this source code is governed by the GPLv3
// license that can be found in the LICENSE file.
@@ -12,9 +12,11 @@ package main
import (
"archive/zip"
"bytes"
- _ "embed"
+ "context"
"flag"
"fmt"
+ "image"
+ "image/draw"
"image/jpeg"
"image/png"
"io"
@@ -28,13 +30,14 @@ import (
"strings"
"time"
+ "golang.org/x/image/tiff"
"rescribe.xyz/bookpipeline"
"rescribe.xyz/bookpipeline/internal/pipeline"
"rescribe.xyz/pdf"
"rescribe.xyz/utils/pkg/hocr"
)
-const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd] [-t training] bookdir/book.pdf [savedir]
+const usage = `Usage: rescribe [-v] [-gui] [-systess] [-tesscmd cmd] [-gbookcmd cmd] [-t training] bookdir/book.pdf [savedir]
Process and OCR a book using the Rescribe pipeline on a local machine.
@@ -42,9 +45,6 @@ OCR results are saved into the bookdir directory unless savedir is
specified.
`
-//go:embed tessdata.20211001.zip
-var tessdatazip []byte
-
const QueueTimeoutSecs = 2 * 60
const PauseBetweenChecks = 1 * time.Second
const LogSaveTime = 1 * time.Minute
@@ -73,6 +73,7 @@ type Clouder interface {
type Pipeliner interface {
Clouder
PreQueueId() string
+ PreNoWipeQueueId() string
WipeQueueId() string
OCRPageQueueId() string
AnalyseQueueId() string
@@ -93,7 +94,7 @@ func resetTimer(t *time.Timer, d time.Duration) {
}
}
-// unpackTessZip unpacks a byte array of a zip file into a directory
+// unpackZip unpacks a byte array of a zip file into a directory
func unpackZip(b []byte, dir string) error {
br := bytes.NewReader(b)
zr, err := zip.NewReader(br, br.Size())
@@ -138,22 +139,25 @@ func unpackZip(b []byte, dir string) error {
func main() {
deftesscmd := "tesseract"
+ defgbookcmd := "getgbook"
if runtime.GOOS == "windows" {
deftesscmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
+ defgbookcmd = "getgbook.exe"
}
verbose := flag.Bool("v", false, "verbose")
usegui := flag.Bool("gui", false, "Use graphical user interface")
systess := flag.Bool("systess", false, "Use the system installed Tesseract, rather than the copy embedded in rescribe.")
- training := flag.String("t", "rescribev8_fast.traineddata", `Path to the tesseract training file to use.
+ training := flag.String("t", "rescribev9_fast.traineddata", `Path to the tesseract training file to use.
These training files are included in rescribe, and are always available:
-- carolinemsv1_fast.traineddata (Caroline Miniscule)
-- eng.traineddata (Modern English)
-- lat.traineddata (Latin modern printing)
-- rescribefrav2_fast.traineddata (French historic printing)
-- rescribev8_fast.traineddata (Latin historic printing)
+- eng.traineddata (English, modern print)
+- lat.traineddata (Latin, modern print)
+- rescribev9_fast.traineddata (Latin/English/French, printed ca 1500-1800)
`)
+ gbookcmd := flag.String("gbookcmd", defgbookcmd, "The getgbook executable to run. You may need to set this to the full path of getgbook.exe if you're on Windows.")
tesscmd := flag.String("tesscmd", deftesscmd, "The Tesseract executable to run. You may need to set this to the full path of Tesseract.exe if you're on Windows.")
+ wipe := flag.Bool("wipe", false, "Use wiper tool to remove noise like gutters from page before processing.")
+ fullpdf := flag.Bool("fullpdf", false, "Use highest image quality for searchable PDF (requires lots of RAM).")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
@@ -185,7 +189,7 @@ These training files are included in rescribe, and are always available:
log.Fatalln("Error setting up tesseract directory:", err)
}
- if !*systess {
+ if !*systess && len(tesszip) > 0 {
err = unpackZip(tesszip, tessdir)
if err != nil {
log.Fatalln("Error unpacking embedded Tesseract zip:", err)
@@ -200,30 +204,73 @@ These training files are included in rescribe, and are always available:
}
}
+ _, err = exec.LookPath(tessCommand)
+ if err != nil {
+ log.Fatalf("No tesseract executable found [tried %s], either set -tesscmd and -systess on the command line or use the official build which includes an embedded copy of Tesseract.", tessCommand)
+ }
+
+ gbookCommand := *gbookcmd
+ if len(gbookzip) > 0 {
+ err = unpackZip(gbookzip, tessdir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded getgbook zip:", err)
+ }
+ switch runtime.GOOS {
+ case "darwin":
+ gbookCommand = filepath.Join(tessdir, "getgbook")
+ case "linux":
+ gbookCommand = filepath.Join(tessdir, "getgbook")
+ case "windows":
+ gbookCommand = filepath.Join(tessdir, "getgbook.exe")
+ }
+ }
+
+ _, err = exec.LookPath(gbookCommand)
+ if err != nil {
+ log.Printf("No getgbook found [tried %s], google book downloading will be disabled, either set -gbookcmd on the command line or use the official build which includes an embedded getgbook.", gbookCommand)
+ gbookCommand = ""
+ }
+
tessdatadir := filepath.Join(tessdir, "tessdata")
err = os.MkdirAll(tessdatadir, 0755)
if err != nil {
log.Fatalln("Error setting up tessdata directory:", err)
}
- err = unpackZip(tessdatazip, tessdatadir)
- if err != nil {
- log.Fatalln("Error unpacking embedded tessdata zip:", err)
+ if len(tessdatazip) > 0 {
+ err = unpackZip(tessdatazip, tessdatadir)
+ if err != nil {
+ log.Fatalln("Error unpacking embedded tessdata zip:", err)
+ }
}
- // if trainingPath doesn't exist, set it to the embedded training instead
- _, err = os.Stat(trainingPath)
- if err != nil && !os.IsExist(err) {
- trainingPath = filepath.Base(trainingPath)
- trainingPath = filepath.Join(tessdatadir, trainingPath)
+ // copy training path to the tessdir directory, so that we can keep that a
+ // writeable space, which is needed opening other trainings in sandboxes
+ // like flatpak
+ in, err := os.Open(trainingPath)
+ trainingPath = filepath.Join(tessdatadir, filepath.Base(trainingPath))
+ if err != nil {
+ in, err = os.Open(trainingPath)
+ if err != nil {
+ log.Fatalf("Error opening training file %s: %v", trainingPath, err)
+ }
}
-
- f, err := os.Open(trainingPath)
+ defer in.Close()
+ newPath := trainingPath + ".new"
+ out, err := os.Create(newPath)
if err != nil {
- fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath)
- fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n")
- os.Exit(1)
+ log.Fatalf("Error creating training file %s: %v", newPath, err)
+ }
+ defer out.Close()
+ _, err = io.Copy(out, in)
+ if err != nil {
+ log.Fatalf("Error copying training file to %s: %v", newPath, err)
+ }
+ in.Close()
+ out.Close()
+ err = os.Rename(newPath, trainingPath)
+ if err != nil {
+ log.Fatalf("Error moving new training file to %s: %v", trainingPath, err)
}
- f.Close()
abstraining, err := filepath.Abs(trainingPath)
if err != nil {
@@ -237,13 +284,26 @@ These training files are included in rescribe, and are always available:
}
if flag.NArg() < 1 || *usegui {
- err := startGui(*verboselog, tessCommand, trainingName, *systess, tessdir)
+ err := startGui(verboselog, tessCommand, gbookCommand, trainingName, tessdir)
+ err = os.RemoveAll(tessdir)
+ if err != nil {
+ log.Printf("Error removing tesseract directory %s: %v", tessdir, err)
+ }
+
if err != nil {
log.Fatalln("Error in gui:", err)
}
return
}
+ f, err := os.Open(trainingPath)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error: Training files %s or %s could not be opened.\n", *training, trainingPath)
+ fmt.Fprintf(os.Stderr, "Set the `-t` flag with path to a tesseract .traineddata file.\n")
+ os.Exit(1)
+ }
+ f.Close()
+
bookdir := flag.Arg(0)
bookname := strings.ReplaceAll(filepath.Base(bookdir), " ", "_")
savedir := bookdir
@@ -258,27 +318,44 @@ These training files are included in rescribe, and are always available:
log.Fatalln("Error opening book file/dir:", err)
}
+ var ctx context.Context
+ ctx = context.Background()
+
+ // TODO: support google book downloading, as done with the GUI
+
// try opening as a PDF, and extracting
if !fi.IsDir() {
if flag.NArg() < 2 {
savedir = strings.TrimSuffix(bookdir, ".pdf")
}
- bookdir, err = extractPdfImgs(bookdir)
+ bookdir, err = extractPdfImgs(ctx, bookdir)
if err != nil {
log.Fatalln("Error opening file as PDF:", err)
}
+ // if this occurs then extractPdfImgs() will have recovered from
+ // a panic in the pdf package
+ if bookdir == "" {
+ log.Fatalln("Error opening file as PDF: image type not supported, you will need to extract images manually.")
+ }
bookname = strings.TrimSuffix(bookname, ".pdf")
ispdf = true
}
- err = startProcess(*verboselog, tessCommand, bookdir, bookname, trainingName, *systess, savedir, tessdir)
+ err = startProcess(ctx, verboselog, tessCommand, bookdir, bookname, trainingName, savedir, tessdir, !*wipe, *fullpdf)
if err != nil {
log.Fatalln(err)
}
+ if !*systess {
+ err = os.RemoveAll(tessdir)
+ if err != nil {
+ log.Printf("Error removing tesseract directory %s: %v", tessdir, err)
+ }
+ }
+
if ispdf {
os.RemoveAll(filepath.Clean(filepath.Join(bookdir, "..")))
}
@@ -286,7 +363,16 @@ These training files are included in rescribe, and are always available:
// extractPdfImgs extracts all images embedded in a PDF to a
// temporary directory, which is returned on success.
-func extractPdfImgs(path string) (string, error) {
+func extractPdfImgs(ctx context.Context, path string) (string, error) {
+ defer func() {
+ // unfortunately the pdf library will panic if it sees an encoding
+ // it can't decode, so recover from that and give a warning
+ r := recover()
+ if r != nil {
+ fmt.Fprintf(os.Stderr, "Warning: Error extracting from PDF: %v\n", r)
+ }
+ }()
+
p, err := pdf.Open(path)
if err != nil {
return "", err
@@ -305,9 +391,20 @@ func extractPdfImgs(path string) (string, error) {
}
for pgnum := 1; pgnum <= p.NumPage(); pgnum++ {
+ select {
+ case <-ctx.Done():
+ return "", ctx.Err()
+ default:
+ }
if p.Page(pgnum).V.IsNull() {
continue
}
+ var rotate int64
+ for v := p.Page(pgnum).V; !v.IsNull(); v = v.Key("Parent") {
+ if r := v.Key("Rotate"); !r.IsNull() {
+ rotate = r.Int64()
+ }
+ }
res := p.Page(pgnum).Resources()
if res.Kind() != pdf.Dict {
continue
@@ -323,7 +420,7 @@ func extractPdfImgs(path string) (string, error) {
continue
}
- fn := fmt.Sprintf("%s-%04d.jpg", k, pgnum)
+ fn := fmt.Sprintf("%04d-%s.jpg", pgnum, k)
path := filepath.Join(tempdir, fn)
w, err := os.Create(path)
defer w.Close()
@@ -343,16 +440,31 @@ func extractPdfImgs(path string) (string, error) {
if err != nil {
return tempdir, fmt.Errorf("Error removing extracted image %s from PDF: %v\n", fn, err)
}
+
+ if rotate != 0 {
+ err = rotateImage(path, rotate)
+ if err != nil {
+ return tempdir, fmt.Errorf("Error rotating extracted image %s from PDF: %v\n", fn, err)
+ }
+ }
}
}
// TODO: check for places where there are multiple images per page, and only keep largest ones where that's the case
+ select {
+ case <-ctx.Done():
+ return "", ctx.Err()
+ default:
+ }
+
return tempdir, nil
}
// rmIfNotImage attempts to decode a given file as an image. If it is
// decode-able as PNG, then rename file extension from .jpg to .png,
-// if it fails to be read as PNG or JPEG it will be deleted.
+// if it is decode-able as TIFF then convert to PNG and rename file
+// extension appropriately, if it fails to be read as PNG, TIFF or
+// JPEG it will just be deleted.
func rmIfNotImage(f string) error {
r, err := os.Open(f)
defer r.Close()
@@ -363,9 +475,9 @@ func rmIfNotImage(f string) error {
r.Close()
if err == nil {
b := strings.TrimSuffix(f, ".jpg")
- err = os.Rename(f, b + ".png")
+ err = os.Rename(f, b+".png")
if err != nil {
- return fmt.Errorf("Error renaming %s to %s: %v", f, b + ".png", err)
+ return fmt.Errorf("Error renaming %s to %s: %v", f, b+".png", err)
}
return nil
}
@@ -376,19 +488,134 @@ func rmIfNotImage(f string) error {
return fmt.Errorf("Failed to open image %s: %v\n", f, err)
}
_, err = jpeg.Decode(r)
+ r.Close()
+ if err == nil {
+ return nil
+ }
+
+ r, err = os.Open(f)
+ defer r.Close()
if err != nil {
+ return fmt.Errorf("Failed to open image %s: %v\n", f, err)
+ }
+ t, err := tiff.Decode(r)
+ if err == nil {
+ b := strings.TrimSuffix(f, ".jpg")
+ n, err := os.Create(b + ".png")
+ defer n.Close()
+ if err != nil {
+ return fmt.Errorf("Failed to create file to store new png %s from tiff %s: %v\n", b+".png", f, err)
+ }
+ err = png.Encode(n, t)
+ if err != nil {
+ return fmt.Errorf("Failed to encode tiff as png for %s: %v\n", f, err)
+ }
r.Close()
err = os.Remove(f)
if err != nil {
- return fmt.Errorf("Failed to remove invalid image %s: %v", f, err)
+ return fmt.Errorf("Failed to remove original tiff %s: %v\n", f, err)
+ }
+ return nil
+ }
+
+ r.Close()
+ err = os.Remove(f)
+ if err != nil {
+ return fmt.Errorf("Failed to remove invalid image %s: %v", f, err)
+ }
+
+ return nil
+}
+
+// rotateImage rotates an image at the given path by the given angle
+func rotateImage(path string, angle int64) error {
+ switch angle {
+ case 90:
+ // proceed with the rest of the function
+ case 180, 270:
+ // rotate the image again first, as many times as necessary.
+ // this is inefficient but easy.
+ err := rotateImage(path, angle-90)
+ if err != nil {
+ return fmt.Errorf("error with a rotation run: %w", err)
}
+ default:
+ return fmt.Errorf("Rotation angle of %d is not supported", angle)
+ }
+
+ r, err := os.Open(path)
+ defer r.Close()
+ if err != nil {
+ return fmt.Errorf("Failed to open image: %w", err)
+ }
+ img, err := png.Decode(r)
+ if err != nil {
+ r.Close()
+ r, err = os.Open(path)
+ defer r.Close()
+ if err != nil {
+ return fmt.Errorf("Failed to open image: %w", err)
+ }
+ img, err = jpeg.Decode(r)
+ }
+ if err != nil {
+ r.Close()
+ r, err = os.Open(path)
+ defer r.Close()
+ if err != nil {
+ return fmt.Errorf("Failed to open image: %w", err)
+ }
+ img, err = tiff.Decode(r)
+ }
+ if err != nil {
+ return fmt.Errorf("Failed to decode image as png, jpeg or tiff: %w", err)
+ }
+
+ b := img.Bounds()
+
+ orig := image.NewRGBA(b)
+ draw.Draw(orig, b, img, b.Min, draw.Src)
+
+ newb := image.Rectangle{
+ Min: image.Point{X: 0, Y: 0},
+ Max: image.Point{X: b.Dy(), Y: b.Dx()},
+ }
+ new := image.NewRGBA(newb)
+
+ for x := b.Min.X; x < b.Max.X; x++ {
+ desty := newb.Min.Y + x
+ for y := b.Max.Y; y > b.Min.Y; y-- {
+ destx := b.Dy() - y + newb.Min.X
+ new.SetRGBA(destx, desty, orig.RGBAAt(x, y))
+ }
+ }
+
+ err = r.Close()
+ if err != nil {
+ return fmt.Errorf("Failed to close image: %w", err)
+ }
+ w, err := os.Create(path)
+ if err != nil {
+ return fmt.Errorf("Failed to create rotated image: %w", err)
+ }
+ defer w.Close()
+
+ if !strings.HasSuffix(path, ".jpg") {
+ err = jpeg.Encode(w, new, nil)
+ } else {
+ err = png.Encode(w, new)
+ }
+ if err != nil {
+ return fmt.Errorf("Failed to encode rotated image: %w", err)
}
return nil
}
-func startProcess(logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, systess bool, savedir string, tessdir string) error {
- _, err := exec.Command(tessCommand, "--help").Output()
+func startProcess(ctx context.Context, logger *log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, savedir string, tessdir string, nowipe bool, fullpdf bool) error {
+ cmd := exec.Command(tessCommand, "--help")
+ pipeline.HideCmd(cmd)
+ _, err := cmd.Output()
if err != nil {
errmsg := "Error, Can't run Tesseract\n"
errmsg += "Ensure that Tesseract is installed and available, or don't use the -systess flag.\n"
@@ -404,7 +631,7 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam
}
var conn Pipeliner
- conn = &bookpipeline.LocalConn{Logger: &logger, TempDir: tempdir}
+ conn = &bookpipeline.LocalConn{Logger: logger, TempDir: tempdir}
conn.Log("Setting up session")
err = conn.Init()
@@ -415,14 +642,14 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam
fmt.Printf("Copying book to pipeline\n")
- err = uploadbook(bookdir, bookname, conn)
+ err = uploadbook(ctx, bookdir, bookname, conn, nowipe)
if err != nil {
_ = os.RemoveAll(tempdir)
return fmt.Errorf("Error uploading book: %v", err)
}
fmt.Printf("Processing book\n")
- err = processbook(trainingName, tessCommand, conn)
+ err = processbook(ctx, trainingName, tessCommand, conn, fullpdf)
if err != nil {
_ = os.RemoveAll(tempdir)
return fmt.Errorf("Error processing book: %v", err)
@@ -444,18 +671,16 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam
return fmt.Errorf("Error removing temporary directory %s: %v", tempdir, err)
}
- if !systess {
- err = os.RemoveAll(tessdir)
- if err != nil {
- return fmt.Errorf("Error removing tesseract directory %s: %v", tessdir, err)
- }
- }
-
hocrs, err := filepath.Glob(fmt.Sprintf("%s%s*.hocr", savedir, string(filepath.Separator)))
if err != nil {
return fmt.Errorf("Error looking for .hocr files: %v", err)
}
+ err = addFullTxt(hocrs, bookname)
+ if err != nil {
+ log.Fatalf("Error creating full txt version: %v", err)
+ }
+
for _, v := range hocrs {
err = addTxtVersion(v)
if err != nil {
@@ -471,11 +696,46 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam
if err != nil {
log.Fatalf("Error moving hocr %s to hocr directory: %v", v, err)
}
+
+ pngname := strings.Replace(v, ".hocr", ".png", 1)
+ err = os.MkdirAll(filepath.Join(savedir, "png"), 0755)
+ if err != nil {
+ log.Fatalf("Error creating png directory: %v", err)
+ }
+
+ err = os.Rename(pngname, filepath.Join(savedir, "png", filepath.Base(pngname)))
+ if err != nil {
+ log.Fatalf("Error moving png %s to png directory: %v", pngname, err)
+ }
+
}
// For simplicity, remove .binarised.pdf and rename .colour.pdf to .pdf
- _ = os.Remove(filepath.Join(savedir, bookname+".binarised.pdf"))
- _ = os.Rename(filepath.Join(savedir, bookname+".colour.pdf"), filepath.Join(savedir, bookname+".pdf"))
+ // providing they both exist, otherwise just rename whichever exists
+ // to .pdf.
+ binpath := filepath.Join(savedir, bookname+".binarised.pdf")
+ colourpath := filepath.Join(savedir, bookname+".colour.pdf")
+ fullsizepath := filepath.Join(savedir, bookname+".original.pdf")
+ pdfpath := filepath.Join(savedir, bookname+" searchable.pdf")
+
+ // If full size pdf is requested, replace colour.pdf with it
+ if fullpdf {
+ _ = os.Rename(fullsizepath, colourpath)
+ }
+
+ _, err = os.Stat(binpath)
+ binexists := err == nil || os.IsExist(err)
+ _, err = os.Stat(colourpath)
+ colourexists := err == nil || os.IsExist(err)
+
+ if binexists && colourexists {
+ _ = os.Remove(binpath)
+ _ = os.Rename(colourpath, pdfpath)
+ } else if binexists {
+ _ = os.Rename(binpath, pdfpath)
+ } else if colourexists {
+ _ = os.Rename(colourpath, pdfpath)
+ }
return nil
}
@@ -506,21 +766,48 @@ func addTxtVersion(hocrfn string) error {
return nil
}
-func uploadbook(dir string, name string, conn Pipeliner) error {
+func addFullTxt(hocrs []string, bookname string) error {
+ if len(hocrs) == 0 {
+ return nil
+ }
+ var full string
+ for i, v := range hocrs {
+ t, err := hocr.GetText(v)
+ if err != nil {
+ return fmt.Errorf("Error getting text from hocr file %s: %v", v, err)
+ }
+ if i > 0 {
+ full += "\n"
+ }
+ full += t
+ }
+
+ dir := filepath.Dir(hocrs[0])
+ fn := filepath.Join(dir, bookname+".txt")
+ err := ioutil.WriteFile(fn, []byte(full), 0644)
+ if err != nil {
+ return fmt.Errorf("Error creating text file %s: %v", fn, err)
+ }
+
+ return nil
+}
+
+func uploadbook(ctx context.Context, dir string, name string, conn Pipeliner, nowipe bool) error {
_, err := os.Stat(dir)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("Error: directory %s not found", dir)
}
- err = pipeline.CheckImages(dir)
+ err = pipeline.CheckImages(ctx, dir)
if err != nil {
return fmt.Errorf("Error with images in %s: %v", dir, err)
}
- err = pipeline.UploadImages(dir, name, conn)
+ err = pipeline.UploadImages(ctx, dir, name, conn)
if err != nil {
return fmt.Errorf("Error saving images to process from %s: %v", dir, err)
}
- qid := pipeline.DetectQueueType(dir, conn)
+ qid := pipeline.DetectQueueType(dir, conn, nowipe)
+ fmt.Printf("Uploading to queue %s\n", qid)
err = conn.AddToQueue(qid, name)
if err != nil {
@@ -531,9 +818,14 @@ func uploadbook(dir string, name string, conn Pipeliner) error {
}
func downloadbook(dir string, name string, conn Pipeliner) error {
- err := pipeline.DownloadBestPages(dir, name, conn, false)
+ err := pipeline.DownloadBestPages(dir, name, conn)
if err != nil {
- return fmt.Errorf("Error downloading best pages: %v", err)
+ return fmt.Errorf("No images found")
+ }
+
+ err = pipeline.DownloadBestPngs(dir, name, conn)
+ if err != nil {
+ return fmt.Errorf("No images found")
}
err = pipeline.DownloadPdfs(dir, name, conn)
@@ -549,17 +841,19 @@ func downloadbook(dir string, name string, conn Pipeliner) error {
return nil
}
-func processbook(training string, tesscmd string, conn Pipeliner) error {
+func processbook(ctx context.Context, training string, tesscmd string, conn Pipeliner, fullpdf bool) error {
origPattern := regexp.MustCompile(`[0-9]{4}.(jpg|png)$`)
wipePattern := regexp.MustCompile(`[0-9]{4,6}(.bin)?.(jpg|png)$`)
ocredPattern := regexp.MustCompile(`.hocr$`)
var checkPreQueue <-chan time.Time
+ var checkPreNoWipeQueue <-chan time.Time
var checkWipeQueue <-chan time.Time
var checkOCRPageQueue <-chan time.Time
var checkAnalyseQueue <-chan time.Time
var stopIfQuiet *time.Timer
checkPreQueue = time.After(0)
+ checkPreNoWipeQueue = time.After(0)
checkWipeQueue = time.After(0)
checkOCRPageQueue = time.After(0)
checkAnalyseQueue = time.After(0)
@@ -571,6 +865,27 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
for {
select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-checkPreNoWipeQueue:
+ msg, err := conn.CheckQueue(conn.PreNoWipeQueueId(), QueueTimeoutSecs)
+ checkPreNoWipeQueue = time.After(PauseBetweenChecks)
+ if err != nil {
+ return fmt.Errorf("Error checking preprocess no wipe queue: %v", err)
+ }
+ if msg.Handle == "" {
+ conn.Log("No message received on preprocess no wipe queue, sleeping")
+ continue
+ }
+ stopTimer(stopIfQuiet)
+ conn.Log("Message received on preprocess no wipe queue, processing", msg.Body)
+ fmt.Printf(" Preprocessing book (binarising only, no wiping)\n")
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Preprocess(thresholds, true), origPattern, conn.PreNoWipeQueueId(), conn.OCRPageQueueId())
+ resetTimer(stopIfQuiet, quietTime)
+ if err != nil {
+ return fmt.Errorf("Error during preprocess (no wipe): %v", err)
+ }
+ fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
case <-checkPreQueue:
msg, err := conn.CheckQueue(conn.PreQueueId(), QueueTimeoutSecs)
checkPreQueue = time.After(PauseBetweenChecks)
@@ -584,12 +899,12 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on preprocess queue, processing", msg.Body)
fmt.Printf(" Preprocessing book (binarising and wiping)\n")
- err = pipeline.ProcessBook(msg, conn, pipeline.Preprocess(thresholds), origPattern, conn.PreQueueId(), conn.OCRPageQueueId())
- fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Preprocess(thresholds, false), origPattern, conn.PreQueueId(), conn.OCRPageQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("Error during preprocess: %v", err)
}
+ fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
case <-checkWipeQueue:
msg, err := conn.CheckQueue(conn.WipeQueueId(), QueueTimeoutSecs)
checkWipeQueue = time.After(PauseBetweenChecks)
@@ -603,12 +918,12 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on wipeonly queue, processing", msg.Body)
fmt.Printf(" Preprocessing book (wiping only)\n")
- err = pipeline.ProcessBook(msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId())
- fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Wipe, wipePattern, conn.WipeQueueId(), conn.OCRPageQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("Error during wipe: %v", err)
}
+ fmt.Printf(" OCRing pages ") // this is expected to be added to with dots by OCRPage output
case <-checkOCRPageQueue:
msg, err := conn.CheckQueue(conn.OCRPageQueueId(), QueueTimeoutSecs)
checkOCRPageQueue = time.After(PauseBetweenChecks)
@@ -624,7 +939,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on OCR Page queue, processing", msg.Body)
fmt.Printf(".")
- err = pipeline.OcrPage(msg, conn, pipeline.Ocr(training, tesscmd), conn.OCRPageQueueId(), conn.AnalyseQueueId())
+ err = pipeline.OcrPage(ctx, msg, conn, pipeline.Ocr(training, tesscmd), conn.OCRPageQueueId(), conn.AnalyseQueueId())
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("\nError during OCR Page process: %v", err)
@@ -642,7 +957,7 @@ func processbook(training string, tesscmd string, conn Pipeliner) error {
stopTimer(stopIfQuiet)
conn.Log("Message received on analyse queue, processing", msg.Body)
fmt.Printf("\n Analysing OCR and compiling PDFs\n")
- err = pipeline.ProcessBook(msg, conn, pipeline.Analyse(conn), ocredPattern, conn.AnalyseQueueId(), "")
+ err = pipeline.ProcessBook(ctx, msg, conn, pipeline.Analyse(conn, fullpdf), ocredPattern, conn.AnalyseQueueId(), "")
resetTimer(stopIfQuiet, quietTime)
if err != nil {
return fmt.Errorf("Error during analysis: %v", err)
diff --git a/cmd/rescribe/makefile b/cmd/rescribe/makefile
index aee2114..4047ff0 100644
--- a/cmd/rescribe/makefile
+++ b/cmd/rescribe/makefile
@@ -1,19 +1,78 @@
# See LICENSE file for copyright and license details.
+#
+# This is a set of make(1) rules to cross compile rescribe
+# from Linux to other architectures - as we use Fyne, CGO
+# is required, so we have to do more to cross compile than
+# just rely on the standard go tools. It relies on osxcross
+# being set up for the Mac builds, and mingw-w64 being
+# installed for the Windows build.
+#
+# The standard go tools work perfectly for native builds on
+# all architectures - note that "go generate" needs to be
+# run before building to download the dependencies which are
+# embedded.
-all: dist/linux/rescribe dist/darwin_amd64/rescribe dist/darwin_arm64/rescribe dist/windows/rescribe.exe
+# For osxcross, there are many versions of the MacOS SDK
+# that are too old or too new to build Rescribe correctly.
+# SDK 11.3, as extracted from XCode 12.5.1, seems to work
+# perfectly for us.
+OSXCROSSBIN=$(HOME)/src/osxcross/target/bin
-dist/linux/rescribe:
+EMBEDS=embed_darwin.go embed_darwin_amd64.go embed_darwin_arm64.go embed_linux.go embed_windows.go embed_other.go
+GODEPS=gui.go main.go $(EMBEDS)
+VERSION=1.3.0
+
+all: dist/linux/rescribe dist/linux/wayland/rescribe dist/darwin/rescribe.zip dist/windows/rescribe.exe
+
+dist/linux/rescribe: $(GODEPS)
+ go generate
mkdir -p dist/linux
- GOOS=linux GOARCH=amd64 go build -o $@ .
+ GOOS=linux GOARCH=amd64 go build -tags embed -o $@ .
+
+dist/linux/wayland/rescribe: $(GODEPS)
+ go generate
+ mkdir -p dist/linux
+ GOOS=linux GOARCH=amd64 go build -tags embed,wayland -o $@ .
+
+build/darwin_amd64/rescribe: $(GODEPS)
+ go generate
+ mkdir -p build/darwin_amd64
+ PATH="$(PATH):$(OSXCROSSBIN)" CC="o64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -tags embed -o $@ .
-dist/darwin_amd64/rescribe:
- mkdir -p dist/darwin_amd64
- GOOS=darwin GOARCH=amd64 go build -o $@ .
+build/darwin_arm64/rescribe: $(GODEPS)
+ go generate
+ mkdir -p build/darwin_arm64
+ PATH="$(PATH):$(OSXCROSSBIN)" CC="oa64-clang" CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -tags embed -o $@ .
-dist/darwin_arm64/rescribe:
- mkdir -p dist/darwin_arm64
- GOOS=darwin GOARCH=arm64 go build -o $@ .
+build/darwin/rescribe: build/darwin_amd64/rescribe build/darwin_arm64/rescribe
+ mkdir -p build/darwin
+ PATH="$(PATH):$(OSXCROSSBIN)" lipo -create build/darwin_amd64/rescribe build/darwin_arm64/rescribe -output $@
-dist/windows/rescribe.exe:
+build/darwin/Rescribe.app: build/darwin/rescribe
+ go install fyne.io/fyne/v2/cmd/fyne@v2.1.2
+ fyne package --release --certificate Rescribe --id xyz.rescribe.rescribe --tags embed --name Rescribe --exe build/darwin/rescribe --os darwin --icon icon.png --appVersion $VERSION
+ codesign -s Rescribe Rescribe.app
+ mv Rescribe.app $@
+
+dist/darwin/rescribe.zip: build/darwin/Rescribe.app
+ mkdir -p dist/darwin
+ cd build/darwin; zip -r ../../dist/darwin/rescribe.zip Rescribe.app
+
+build/windows/rescribe-bin.exe: $(GODEPS)
+ go generate
+ mkdir -p build/windows
+ CC="x86_64-w64-mingw32-gcc" CGO_ENABLED=1 GOOS=windows GOARCH=amd64 go build -tags embed -o $@ .
+
+dist/windows/rescribe.exe: build/windows/rescribe-bin.exe
mkdir -p dist/windows
- GOOS=windows GOARCH=386 go build -o $@ .
+ CC="x86_64-w64-mingw32-gcc" fyne package --tags embed --name Rescribe --exe build/windows/rescribe-bin.exe --os windows --icon icon.png --appVersion $VERSION
+ mv rescribe.exe $@
+
+# used for flatpak building
+modules.tar.xz: ../../go.mod
+ go mod vendor
+ cd ../.. && tar c vendor | xz > cmd/rescribe/$@
+
+clean:
+ rm -rf dist build
+ rm -rf ../../vendor
diff --git a/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a
new file mode 100644
index 0000000..1a7ed9c
--- /dev/null
+++ b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("https://www0google\xf7/books/edition/_/")
diff --git a/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f7963 b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f7963
new file mode 100644
index 0000000..b637539
--- /dev/null
+++ b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f7963
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("https://Books.google\xc1&id=")
diff --git a/cmd/rescribe/xyz.rescribe.rescribe.appdata.xml b/cmd/rescribe/xyz.rescribe.rescribe.appdata.xml
new file mode 100644
index 0000000..98916b5
--- /dev/null
+++ b/cmd/rescribe/xyz.rescribe.rescribe.appdata.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<component type="desktop-application">
+ <id>xyz.rescribe.rescribe</id>
+
+ <name>Rescribe</name>
+ <developer_name>Rescribe Ltd</developer_name>
+ <summary>High quality OCR for images, PDFs and Google Books.</summary>
+
+ <description><p>An easy-to-use desktop tool for OCR of images, PDFs and Google Books. It uses the Tesseract OCR engine, combined with modern and efficient preprocessing and analysis pipelines, to produce high quality output in plain text, hOCR and searchable PDF format. The tool has been built with a focus on OCR of historical printed works, but it includes modern language options and also works well on modern printed works.</p></description>
+
+ <screenshots>
+ <screenshot type="default"><image>https://rescribe.xyz/rescribe/screenshot-03.png</image></screenshot>
+ <screenshot><image>https://rescribe.xyz/rescribe/screenshot-04.png</image></screenshot>
+ </screenshots>
+
+ <url type="homepage">https://rescribe.xyz/rescribe</url>
+
+ <metadata_license>MIT</metadata_license>
+ <project_license>GPL-3.0</project_license>
+
+ <launchable type="desktop-id">xyz.rescribe.rescribe.desktop</launchable>
+
+ <content_rating type="oars-1.1" />
+
+ <releases>
+ <release version="1.2.0" date="2024-02-16" type="stable">
+ <description>
+ <p>Fixed bug with directories containing files with spaces causing the process to fail, added concatenated text output named bookname.txt, fixed selecting a custom training in flatpak build, fixed getgbook on arm64 MacOS, improved layout of log area to fill all available space in the window, improved readability of log area text.</p>
+ </description>
+ </release>
+ <release version="1.1.0" date="2023-02-13" type="stable">
+ <description>
+ <p>Improved PDF reading by adding support for embedded CCITT images. Improved PDF parsing to prevent a possible crash with bad PDF files. Improved error messages for unreadable PDFs. Improved GUI theme thanks to an update to Fyne.</p>
+ </description>
+ </release>
+ <release version="1.0.0" date="2022-03-22" type="stable">
+ <description>
+ <p>Thanks to our fabulous Kickstarter backers, lots of improvements! Added GUI, added PDF extractor, added Google Book downloader, created a single binary for OSX for M1 and amd64, added file renamer so page files no longer need a particular naming format, added option to disable page wiping, added option to create full size PDF.</p>
+ </description>
+ </release>
+ </releases>
+
+</component>
diff --git a/cmd/rescribe/xyz.rescribe.rescribe.desktop b/cmd/rescribe/xyz.rescribe.rescribe.desktop
new file mode 100644
index 0000000..331079f
--- /dev/null
+++ b/cmd/rescribe/xyz.rescribe.rescribe.desktop
@@ -0,0 +1,9 @@
+[Desktop Entry]
+Version=1.0
+Type=Application
+Name=Rescribe
+Comment=An easy-to-use desktop tool for performing OCR on image files, PDFs and Google Books.
+Categories=Office;Literature;OCR;Scanning;TextTools
+Icon=xyz.rescribe.rescribe
+Exec=rescribe
+Terminal=false
diff --git a/cmd/rescribe/xyz.rescribe.rescribe.yml b/cmd/rescribe/xyz.rescribe.rescribe.yml
new file mode 100644
index 0000000..bd8faf4
--- /dev/null
+++ b/cmd/rescribe/xyz.rescribe.rescribe.yml
@@ -0,0 +1,65 @@
+app-id: xyz.rescribe.rescribe
+runtime: org.freedesktop.Platform
+runtime-version: '23.08'
+sdk: org.freedesktop.Sdk
+sdk-extensions: org.freedesktop.Sdk.Extension.golang
+build-options:
+ append-path: /usr/lib/sdk/golang/bin
+finish-args:
+ - --socket=fallback-x11
+ - --share=ipc # needed for X11
+ - --socket=wayland
+ - --device=dri # OpenGL
+ - --share=network # Used for google book downloading
+ - --filesystem=home
+command: rescribe
+modules:
+ - name: rescribe
+ buildsystem: simple
+ build-commands:
+ - cd cmd/rescribe && go build .
+ - cd cmd/rescribe && go build -tags wayland -o rescribe-wayland .
+ - install -Dm00755 cmd/rescribe/rescribe $FLATPAK_DEST/bin/rescribe-bin
+ - install -Dm00755 cmd/rescribe/rescribe-wayland $FLATPAK_DEST/bin/rescribe-bin-wayland
+ - install -Dm00644 cmd/rescribe/icon.256.png $FLATPAK_DEST/share/icons/hicolor/256x256/apps/xyz.rescribe.rescribe.png
+ - install -Dm00644 cmd/rescribe/xyz.rescribe.rescribe.desktop $FLATPAK_DEST/share/applications/xyz.rescribe.rescribe.desktop
+ - install -Dm00644 cmd/rescribe/xyz.rescribe.rescribe.appdata.xml $FLATPAK_DEST/share/appdata/xyz.rescribe.rescribe.appdata.xml
+ - printf '#!/bin/sh\nexport TMPDIR=$XDG_RUNTIME_DIR\nbin=rescribe-bin\ntest -n "$WAYLAND_DISPLAY" && bin=rescribe-bin-wayland\n"$bin" -gbookcmd "/app/bin/getgbook" -tesscmd "/app/bin/tesseract" -t "/app/share/tessdata/rescribev9_fast.traineddata"\n' > $FLATPAK_DEST/bin/rescribe
+ - chmod 755 $FLATPAK_DEST/bin/rescribe
+ - mkdir -p $FLATPAK_DEST/share/tessdata
+ - cp -r tessdata/* $FLATPAK_DEST/share/tessdata/
+ sources:
+ - type: git
+ url: https://github.com/rescribe/bookpipeline
+ tag: v1.2.0
+ commit: bf6e4762191aee0c27242f1d9cbbc2b8972c12f9
+ - type: archive
+ url: https://rescribe.xyz/rescribe/modules-20240206-d2399a.tar.xz
+ sha256: 682820d4cb6129c564cf8df494dc12d35ab059ed99dba34c0b3d6260f7fc30fb
+ strip-components: 0
+ - type: archive
+ url: https://rescribe.xyz/rescribe/embeds/tessdata.20220322.zip
+ sha256: 725fd570a3c3dc0eba9463248ce47a8646db8bafb198d428d6bb8f0be18540ee
+ strip-components: 0
+ dest: tessdata
+ - name: leptonica
+ sources:
+ - type: git
+ url: https://github.com/DanBloomberg/leptonica
+ tag: 1.82.0
+ commit: f4138265b390f1921b9891d6669674d3157887d8
+ - name: tesseract-ocr
+ sources:
+ - type: git
+ url: https://github.com/tesseract-ocr/tesseract
+ tag: 5.2.0
+ commit: 5ad5325a0aa8effc47ca033625b6a51682f82767
+ - name: getxbook
+ buildsystem: simple
+ build-commands:
+ - make PREFIX=$FLATPAK_DEST install
+ sources:
+ - type: git
+ url: https://git.njw.name/getxbook.git
+ commit: c770a86cca74f3b6235000c77c2ab74487e2ac2a
+ disable-shallow-clone: true