summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntonia Rescribe <antonia@rescribe.xyz>2022-03-21 16:42:06 +0100
committerNick White <git@njw.name>2022-03-21 16:09:11 +0000
commitfd50c673c5dc4f42c9ad11a75120b86f1122446a (patch)
treee3001918a2713b63523579c0d25648d5c57029df
parent68e089d48d5629d4d2b69eb16e93342bb9eb7df1 (diff)
added support for new type of Google Books URLS
-rw-r--r--cmd/rescribe/gui.go63
1 files changed, 42 insertions, 21 deletions
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index 7bc3e7b..3de29e3 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -13,6 +13,7 @@ import (
"log"
"os"
"path/filepath"
+ "regexp"
"strings"
"fyne.io/fyne/v2"
@@ -28,18 +29,18 @@ import (
var progressPoints = map[float64]string{
0.11: "Downloading",
0.12: "Processing PDF",
- 0.2: "Preprocessing",
- 0.5: "OCRing",
- 0.9: "Analysing",
- 1.0: "Done",
+ 0.2: "Preprocessing",
+ 0.5: "OCRing",
+ 0.9: "Analysing",
+ 1.0: "Done",
}
var trainingNames = map[string]string{
- "carolinemsv1_fast": "Caroline Miniscule",
- "eng": "English (modern printing)",
- "lat": "Latin (modern printing)",
+ "carolinemsv1_fast": "Caroline Miniscule",
+ "eng": "English (modern printing)",
+ "lat": "Latin (modern printing)",
"rescribefrav2_fast": "French (early printing)",
- "rescribev8_fast": "Latin (early printing)",
+ "rescribev8_fast": "Latin (early printing)",
}
// getBookIdFromUrl returns a 12 character Google Book ID from
@@ -49,23 +50,43 @@ func getBookIdFromUrl(url string) (string, error) {
if len(url) == 12 && !strings.ContainsAny(url, "?/:") {
return url, nil
}
- if !strings.HasPrefix(lurl, "https://books.google") {
- return "", fmt.Errorf("Not a Google Books URL")
+
+ matchUrl, err := regexp.MatchString("https://www.google.[^\\/]*/books/", url)
+ if err != nil {
+ return "", err
}
- start := strings.Index(lurl, "?id=")
- if start == -1 {
- start = strings.Index(lurl, "&id=")
+ if matchUrl == false && !strings.HasPrefix(lurl, "https://books.google") {
+ return "", fmt.Errorf("Not a Google Books URL")
}
- if start >= 0 {
- start += 4
- if len(url[start:]) < 12 {
- return "", fmt.Errorf("Could not find book ID in URL")
+ if strings.HasPrefix(lurl, "https://books.google") {
+ start := strings.Index(lurl, "?id=")
+ if start == -1 {
+ start = strings.Index(lurl, "&id=")
}
- return url[start:start+12], nil
+
+ if start >= 0 {
+ start += 4
+ if len(url[start:]) < 12 {
+ return "", fmt.Errorf("Could not find book ID in URL")
+ }
+ return url[start : start+12], nil
+ }
+
+ return "", fmt.Errorf("Could not find book ID in URL")
}
+ if matchUrl == true {
+ start := strings.Index(lurl, "edition/_/")
+ if start >= 0 {
+ start += 10
+ if len(url[start:]) < 12 {
+ return "", fmt.Errorf("Could not find book ID in URL")
+ }
+ return url[start : start+12], nil
+ }
+ }
return "", fmt.Errorf("Could not find book ID in URL")
}
@@ -395,9 +416,9 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
logarea.CursorRow = strings.Count(logarea.Text, "\n")
lines := strings.Split(logarea.Text, "\n")
- lastline := lines[len(lines) - 1]
+ lastline := lines[len(lines)-1]
for i, v := range progressPoints {
- if strings.HasPrefix(lastline, " " + v) {
+ if strings.HasPrefix(lastline, " "+v) {
// OCRing has a number of dots after it showing how many pages have been processed,
// which we can use to update progress bar more often
// TODO: calculate number of pages we expect, so this can be set accurately
@@ -466,7 +487,7 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
if strings.HasPrefix(dir.Text, "Google Book: ") {
progressBar.SetValue(0.11)
start := len("Google Book: ")
- bookname = dir.Text[start:start+12]
+ bookname = dir.Text[start : start+12]
start = start + 12 + len(" Save to: ")
bookdir = dir.Text[start:]