diff options
| author | Nick White <git@njw.name> | 2022-02-21 14:54:42 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2022-02-21 14:54:42 +0000 | 
| commit | 2c29e91bd98a1354d0cc4772ca629e3f1fc327b5 (patch) | |
| tree | 12117de3210c795a4d407fb277f640221e327466 /cmd/rescribe | |
| parent | 69ac99da02988ad2ed675570ccaa5ff7777f0279 (diff) | |
rescribe: Add getgbook use to the GUI (not embedded yet)
Diffstat (limited to 'cmd/rescribe')
| -rw-r--r-- | cmd/rescribe/gbook.go | 209 | ||||
| -rw-r--r-- | cmd/rescribe/gui.go | 39 | 
2 files changed, 239 insertions, 9 deletions
| diff --git a/cmd/rescribe/gbook.go b/cmd/rescribe/gbook.go new file mode 100644 index 0000000..b1308cf --- /dev/null +++ b/cmd/rescribe/gbook.go @@ -0,0 +1,209 @@ +// Copyright 2022 Nick White. +// Use of this source code is governed by the GPLv3 +// license that can be found in the LICENSE file. + +package main + +import ( +	"context" +	"encoding/json" +	"fmt" +	"io" +	"io/ioutil" +	"net/http" +	"os" +	"os/exec" +	"path" +	"strings" +	"unicode" + +	"rescribe.xyz/bookpipeline/internal/pipeline" +) + +const maxPartLength = 48 + +// formatAuthors formats a list of authors by just selecting +// the first one listed, and returning the uppercased final +// name. +func formatAuthors(authors []string) string { +	if len(authors) == 0 { +		return "" +	} + +	s := authors[0] + +	parts := strings.Fields(s) +	if len(parts) > 1 { +		s = parts[len(parts)-1] +	} + +	s = strings.ToUpper(s) + +	if len(s) > maxPartLength { +		s = s[:maxPartLength] +	} + +	s = strings.Map(stripNonLetters, s) + +	return s +} + +// mapTitle is a function for strings.Map to strip out +// unwanted characters from the title. +func stripNonLetters(r rune) rune { +	if !unicode.IsLetter(r) { +		return -1 +	} +	return r +} + +// formatTitle formats a title to our preferences, notably +// by stripping spaces and punctuation characters. +func formatTitle(title string) string { +	s := strings.Map(stripNonLetters, title) +	if len(s) > maxPartLength { +		s = s[:maxPartLength] +	} +	return s +} + +// getMetadata queries Google Books for metadata we care about +// and returns it formatted as we need it. +func getMetadata(id string) (string, string, string, error) { +	var author, title, year string +	url := fmt.Sprintf("https://www.googleapis.com/books/v1/volumes/%s", id) + +	// designed to be unmarshalled by encoding/json's Unmarshal() +	type bookInfo struct { +		VolumeInfo struct { +			Title		 string +			Authors	   []string +			PublishedDate string +		} +	} + +	resp, err := http.Get(url) +	if err != nil { +		return author, title, year, fmt.Errorf("Error downloading metadata %s: %v", url, err) +	} +	defer resp.Body.Close() + +	if resp.StatusCode != http.StatusOK { +		return author, title, year, fmt.Errorf("Error downloading metadata %s: %v", url, err) +	} + +	b, err := ioutil.ReadAll(resp.Body) +	if err != nil { +		return author, title, year, fmt.Errorf("Error reading metadata %s: %v", url, err) +	} + +	v := bookInfo{} +	err = json.Unmarshal(b, &v) +	if err != nil { +		return author, title, year, fmt.Errorf("Error parsing metadata %s: %v", url, err) +	} + +	author = formatAuthors(v.VolumeInfo.Authors) +	title = formatTitle(v.VolumeInfo.Title) +	year = v.VolumeInfo.PublishedDate + +	return author, title, year, nil +} + +// moveFile just copies a file to the destination without +// using os.Rename, as that can fail if crossing filesystem +// boundaries +func moveFile(from string, to string) error { +	ffrom, err := os.Open(from) +	if err != nil { +		return err +	} +	defer ffrom.Close() + +	fto, err := os.Create(to) +	if err != nil { +		return err +	} +	defer fto.Close() + +	_, err = io.Copy(fto, ffrom) +	if err != nil { +		return err +	} + +	ffrom.Close() +	err = os.Remove(from) +	if err != nil { +		return err +	} + +	return nil +} + +// getGoogleBook downloads all images of a book to a directory +// named YEAR_AUTHORSURNAME_Title_bookid inside basedir, returning +// the directory path +func getGoogleBook(ctx context.Context, id string, basedir string) (string, error) { +	author, title, year, err := getMetadata(id) +	if err != nil { +		return "", err +	} + +	tmpdir, err := ioutil.TempDir("", "bookpipeline") +	if err != nil { +		return "", fmt.Errorf("Error setting up temporary directory: %v", err) +	} + +	// TODO: use embedded version if necessary +	cmd := exec.CommandContext(ctx, "getgbook", id) +	pipeline.HideCmd(cmd) +	cmd.Dir = tmpdir +	cmd.Stdout = os.Stdout +	cmd.Stderr = os.Stderr +	err = cmd.Run() +	if err != nil { +		return "", fmt.Errorf("Error running getgbook %s: %v", id, err) +	} + +	select { +	case <-ctx.Done(): +		_ = os.Remove(tmpdir) +		return "", ctx.Err() +	default: +	} + +	// getgbook downloads into id directory, so move files out of +	// there directly into dir +	tmpdir = path.Join(tmpdir, id) +	f, err := os.Open(tmpdir) +	if err != nil { +		return "", fmt.Errorf("Failed to open %s to move files: %v", tmpdir, err) +	} +	files, err := f.Readdir(0) +	if err != nil { +		return "", fmt.Errorf("Failed to readdir %s to move files: %v", tmpdir, err) +	} + +	d := fmt.Sprintf("%s_%s_%s_%s", year, author, title, id) +	dir := path.Join(basedir, d) +	err = os.MkdirAll(dir, 0755) +	if err != nil { +		return "", fmt.Errorf("Couldn't create directory %s: %v", dir, err) +	} + +	for _, v := range files { +		orig := path.Join(tmpdir, v.Name()) +		new := path.Join(dir, v.Name()) +		err = moveFile(orig, new) +		if err != nil { +			return dir, fmt.Errorf("Failed to move %s to %s: %v", orig, new, err) +		} +	} + +	err = os.Remove(tmpdir) +	if err != nil { +		return dir, fmt.Errorf("Failed to remove temporary directory %s: %v", tmpdir, err) +	} + +	return dir, nil +} diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index b8449b5..fde660a 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -26,6 +26,8 @@ import (  )  var progressPoints = map[float64]string{ +	0.11: "Downloading", +	0.12: "Processing PDF",  	0.2: "Preprocessing",  	0.5: "OCRing",  	0.9: "Analysing", @@ -327,7 +329,7 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error  			if dirEntry.Text == "" {  				dirEntry.SetText(homeDir)  			} -			dir.SetText(fmt.Sprintf("Google Book: %s Savedir: %s", id, dirEntry.Text)) +			dir.SetText(fmt.Sprintf("Google Book: %s Save to: %s", id, dirEntry.Text))  			dirIcon.SetResource(theme.SearchIcon())  			myWindow.SetContent(fullContent)  			gobtn.Enable() @@ -452,18 +454,37 @@ func startGui(log log.Logger, cmd string, training string, tessdir string) error  			progressBar.SetValue(0.1) -			if strings.HasPrefix(dir.Text, "Google Book:") { -				// TODO -				dialog.ShowError(errors.New("Coming soon"), myWindow) -				progressBar.SetValue(0.0) -				for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { -					v.Enable() +			if strings.HasPrefix(dir.Text, "Google Book: ") { +				progressBar.SetValue(0.11) +				start := len("Google Book: ") +				bookname = dir.Text[start:start+12] + +				start = start + 12 + len(" Save to: ") +				bookdir = dir.Text[start:] +				savedir = bookdir + +				fmt.Printf("Downloading Google Book\n") +				d, err := getGoogleBook(ctx, bookname, bookdir) +				if err != nil { +					if !strings.HasSuffix(err.Error(), "signal: killed") { +						msg := fmt.Sprintf("Error downloading Google Book %s: %v\n", bookname, err) +						dialog.ShowError(errors.New(msg), myWindow) +						fmt.Fprintf(os.Stderr, msg) +					} +					progressBar.SetValue(0.0) +					for _, v := range []fyne.Disableable{folderBtn, pdfBtn, gbookBtn, trainingOpts, gobtn} { +						v.Enable() +					} +					abortbtn.Disable() +					return  				} -				abortbtn.Disable() -				return +				bookdir = d +				savedir = d +				bookname = filepath.Base(d)  			}  			if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { +				progressBar.SetValue(0.12)  				bookdir, err = extractPdfImgs(ctx, bookdir)  				if err != nil {  					if !strings.HasSuffix(err.Error(), "context canceled") { | 
