From 63b6942f6b2649c70c30cdced6c033ff2607724f Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 21 Mar 2022 16:26:55 +0000 Subject: rescribe: move getBookIdFromUrl() to gbook.go, and add tests for it --- cmd/rescribe/gbook.go | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'cmd/rescribe/gbook.go') diff --git a/cmd/rescribe/gbook.go b/cmd/rescribe/gbook.go index f0e9e1e..fe2f4b8 100644 --- a/cmd/rescribe/gbook.go +++ b/cmd/rescribe/gbook.go @@ -14,6 +14,7 @@ import ( "os" "os/exec" "path" + "regexp" "strings" "unicode" @@ -206,3 +207,49 @@ func getGoogleBook(ctx context.Context, gbookcmd string, id string, basedir stri return dir, nil } + +// getBookIdFromUrl returns a 12 character Google Book ID from +// a Google URL, or an error if one can't be found. +// Example URLs: +// https://books.google.it/books?id=QjQepCuN8JYC +// https://www.google.it/books/edition/_/VJbr-Oe2au0C +func getBookIdFromUrl(url string) (string, error) { + lurl := strings.ToLower(url) + if len(url) == 12 && !strings.ContainsAny(url, "?/:") { + return url, nil + } + + matchUrl, err := regexp.MatchString("https://www.google.[^\\/]*/books/", url) + if err != nil { + return "", err + } + + if strings.HasPrefix(lurl, "https://books.google") { + start := strings.Index(lurl, "?id=") + if start == -1 { + start = strings.Index(lurl, "&id=") + } + + if start >= 0 { + start += 4 + if len(url[start:]) < 12 { + return "", fmt.Errorf("Could not find book ID in URL") + } + return url[start : start+12], nil + } + + return "", fmt.Errorf("Could not find book ID in URL") + } + if matchUrl == true { + start := strings.Index(lurl, "edition/_/") + + if start >= 0 { + start += 10 + if len(url[start:]) < 12 { + return "", fmt.Errorf("Could not find book ID in URL") + } + return url[start : start+12], nil + } + } + return "", fmt.Errorf("Could not find book ID in URL") +} -- cgit v1.2.1-24-ge1ad