From fd50c673c5dc4f42c9ad11a75120b86f1122446a Mon Sep 17 00:00:00 2001
From: Antonia Rescribe <antonia@rescribe.xyz>
Date: Mon, 21 Mar 2022 16:42:06 +0100
Subject: added support for new type of Google Books URLS

---
 cmd/rescribe/gui.go | 63 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 21 deletions(-)

(limited to 'cmd')

diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index 7bc3e7b..3de29e3 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -13,6 +13,7 @@ import (
 	"log"
 	"os"
 	"path/filepath"
+	"regexp"
 	"strings"
 
 	"fyne.io/fyne/v2"
@@ -28,18 +29,18 @@ import (
 var progressPoints = map[float64]string{
 	0.11: "Downloading",
 	0.12: "Processing PDF",
-	0.2: "Preprocessing",
-	0.5: "OCRing",
-	0.9: "Analysing",
-	1.0: "Done",
+	0.2:  "Preprocessing",
+	0.5:  "OCRing",
+	0.9:  "Analysing",
+	1.0:  "Done",
 }
 
 var trainingNames = map[string]string{
-	"carolinemsv1_fast": "Caroline Miniscule",
-	"eng": "English (modern printing)",
-	"lat": "Latin (modern printing)",
+	"carolinemsv1_fast":  "Caroline Miniscule",
+	"eng":                "English (modern printing)",
+	"lat":                "Latin (modern printing)",
 	"rescribefrav2_fast": "French (early printing)",
-	"rescribev8_fast": "Latin (early printing)",
+	"rescribev8_fast":    "Latin (early printing)",
 }
 
 // getBookIdFromUrl returns a 12 character Google Book ID from
@@ -49,23 +50,43 @@ func getBookIdFromUrl(url string) (string, error) {
 	if len(url) == 12 && !strings.ContainsAny(url, "?/:") {
 		return url, nil
 	}
-	if !strings.HasPrefix(lurl, "https://books.google") {
-		return "", fmt.Errorf("Not a Google Books URL")
+
+	matchUrl, err := regexp.MatchString("https://www.google.[^\\/]*/books/", url)
+	if err != nil {
+		return "", err
 	}
 
-	start := strings.Index(lurl, "?id=")
-	if start == -1 {
-		start = strings.Index(lurl, "&id=")
+	if matchUrl == false && !strings.HasPrefix(lurl, "https://books.google") {
+		return "", fmt.Errorf("Not a Google Books URL")
 	}
 
-	if start >= 0 {
-		start += 4
-		if len(url[start:]) < 12 {
-			return "", fmt.Errorf("Could not find book ID in URL")
+	if strings.HasPrefix(lurl, "https://books.google") {
+		start := strings.Index(lurl, "?id=")
+		if start == -1 {
+			start = strings.Index(lurl, "&id=")
 		}
-		return url[start:start+12], nil
+
+		if start >= 0 {
+			start += 4
+			if len(url[start:]) < 12 {
+				return "", fmt.Errorf("Could not find book ID in URL")
+			}
+			return url[start : start+12], nil
+		}
+
+		return "", fmt.Errorf("Could not find book ID in URL")
 	}
+	if matchUrl == true {
+		start := strings.Index(lurl, "edition/_/")
 
+		if start >= 0 {
+			start += 10
+			if len(url[start:]) < 12 {
+				return "", fmt.Errorf("Could not find book ID in URL")
+			}
+			return url[start : start+12], nil
+		}
+	}
 	return "", fmt.Errorf("Could not find book ID in URL")
 }
 
@@ -395,9 +416,9 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
 				logarea.CursorRow = strings.Count(logarea.Text, "\n")
 
 				lines := strings.Split(logarea.Text, "\n")
-				lastline := lines[len(lines) - 1]
+				lastline := lines[len(lines)-1]
 				for i, v := range progressPoints {
-					if strings.HasPrefix(lastline, "  " + v) {
+					if strings.HasPrefix(lastline, "  "+v) {
 						// OCRing has a number of dots after it showing how many pages have been processed,
 						// which we can use to update progress bar more often
 						// TODO: calculate number of pages we expect, so this can be set accurately
@@ -466,7 +487,7 @@ func startGui(log log.Logger, cmd string, gbookcmd string, training string, tess
 			if strings.HasPrefix(dir.Text, "Google Book: ") {
 				progressBar.SetValue(0.11)
 				start := len("Google Book: ")
-				bookname = dir.Text[start:start+12]
+				bookname = dir.Text[start : start+12]
 
 				start = start + 12 + len(" Save to: ")
 				bookdir = dir.Text[start:]
-- 
cgit v1.2.1-24-ge1ad