diff options
-rw-r--r-- | cmd/rescribe/gbook.go | 12 | ||||
-rw-r--r-- | cmd/rescribe/gbook_test.go | 17 | ||||
-rw-r--r-- | cmd/rescribe/gui.go | 424 | ||||
-rw-r--r-- | cmd/rescribe/gui_test.go | 77 | ||||
-rw-r--r-- | cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a | 2 | ||||
-rw-r--r-- | cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f7963 | 2 |
6 files changed, 323 insertions, 211 deletions
diff --git a/cmd/rescribe/gbook.go b/cmd/rescribe/gbook.go index 320f574..a011181 100644 --- a/cmd/rescribe/gbook.go +++ b/cmd/rescribe/gbook.go @@ -41,7 +41,9 @@ func formatAuthors(authors []string) string { s = strings.ToUpper(s) if len(s) > maxPartLength { - s = s[:maxPartLength] + // truncate to maxPartLength + m := fmt.Sprintf("%%.%ds", maxPartLength) + s = fmt.Sprintf(m, s) } s = strings.Map(stripNonLetters, s) @@ -63,7 +65,9 @@ func stripNonLetters(r rune) rune { func formatTitle(title string) string { s := strings.Map(stripNonLetters, title) if len(s) > maxPartLength { - s = s[:maxPartLength] + // truncate to maxPartLength + m := fmt.Sprintf("%%.%ds", maxPartLength) + s = fmt.Sprintf(m, s) } return s } @@ -232,7 +236,7 @@ func getBookIdFromUrl(url string) (string, error) { if start >= 0 { start += 4 - if len(url[start:]) < 12 { + if len(url) - start < 12 { return "", fmt.Errorf("Could not find book ID in URL") } return url[start : start+12], nil @@ -245,7 +249,7 @@ func getBookIdFromUrl(url string) (string, error) { if start >= 0 { start += 10 - if len(url[start:]) < 12 { + if len(url) - start < 12 { return "", fmt.Errorf("Could not find book ID in URL") } return url[start : start+12], nil diff --git a/cmd/rescribe/gbook_test.go b/cmd/rescribe/gbook_test.go index 56b4b40..f7df595 100644 --- a/cmd/rescribe/gbook_test.go +++ b/cmd/rescribe/gbook_test.go @@ -8,7 +8,7 @@ import ( "testing" ) -func Test_getBookIdFromUrl(t *testing.T) { +func TestGetBookIdFromUrl(t *testing.T) { cases := []struct { url string id string @@ -29,3 +29,18 @@ func Test_getBookIdFromUrl(t *testing.T) { }) } } + +func FuzzGetBookIdFromUrl(f *testing.F) { + cases := []string { + "https://books.google.it/books?id=QjQepCuN8JYC", + "https://www.google.it/books/edition/_/VJbr-Oe2au0C", + } + + for _, c := range cases { + f.Add(c) + } + + f.Fuzz(func(t *testing.T, url string) { + getBookIdFromUrl(url) + }) +} diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index f14f288..16e6bd8 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -78,7 +78,7 @@ func copyStdoutToChan() (chan rune, error) { // copyStderrToChan creates a pipe to copy anything written // to the file also to a rune channel. // TODO: would be nice to merge this with copyStdoutToChan, -// but a naive version using *os.File didn't work. +// but a naive version using *os.File didn't work. func copyStderrToChan() (chan rune, error) { c := make(chan rune) @@ -198,9 +198,9 @@ func mkTrainingSelect(extras []string, parent fyne.Window) *widget.Select { return s } -// formatProgressBarText uses the progressPoints map to set the text for the progress bar +// formatProgressBar uses the progressPoints map to set the text for the progress bar // appropriately -func formatProgressBarText(bar *widget.ProgressBar) func() string { +func formatProgressBar(bar *widget.ProgressBar) func() string { return func() string { for i, v := range progressPoints { if bar.Value == i { @@ -218,6 +218,218 @@ func formatProgressBarText(bar *widget.ProgressBar) func() string { } } +// updateProgress parses the last line of a log and updates a progress +// bar appropriately. +func updateProgress(log string, progressBar *widget.ProgressBar) { + lines := strings.Split(log, "\n") + lastline := lines[len(lines)-1] + for i, v := range progressPoints { + if strings.HasPrefix(lastline, " "+v) { + // OCRing has a number of dots after it showing how many pages have been processed, + // which we can use to update progress bar more often + // TODO: calculate number of pages we expect, so this can be set accurately + if v == "OCRing" { + if progressBar.Value < 0.5 { + progressBar.SetValue(0.5) + } + numdots := strings.Count(lastline, ".") + newval := float64(0.5) + (float64(numdots) * float64(0.01)) + if newval >= 0.9 { + newval = 0.89 + } + progressBar.SetValue(newval) + break + } + progressBar.SetValue(i) + } + } +} + +// start sets up the gui to start the core process, and if all is well +// it starts it +func start(ctx context.Context, log *log.Logger, cmd string, tessdir string, gbookcmd string, dir string, training string, win fyne.Window, logarea *widget.Entry, progressBar *widget.ProgressBar, abortbtn *widget.Button, wipe bool, bigpdf bool, disableWidgets []fyne.Disableable) { + if dir == "" { + return + } + + stdout, err := copyStdoutToChan() + if err != nil { + msg := fmt.Sprintf("Internal error\n\nError copying stdout to chan: %v\n", err) + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + return + } + go func() { + for r := range stdout { + logarea.SetText(logarea.Text + string(r)) + logarea.CursorRow = strings.Count(logarea.Text, "\n") + updateProgress(logarea.Text, progressBar) + } + }() + + stderr, err := copyStderrToChan() + if err != nil { + msg := fmt.Sprintf("Internal error\n\nError copying stdout to chan: %v\n", err) + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + return + } + go func() { + for r := range stderr { + logarea.SetText(logarea.Text + string(r)) + logarea.CursorRow = strings.Count(logarea.Text, "\n") + } + }() + + // Do this in a goroutine so the GUI remains responsive + go func() { + letsGo(ctx, log, cmd, tessdir, gbookcmd, dir, training, win, logarea, progressBar, abortbtn, wipe, bigpdf, disableWidgets) + }() +} + +// letsGo starts the core process +func letsGo(ctx context.Context, log *log.Logger, cmd string, tessdir string, gbookcmd string, dir string, training string, win fyne.Window, logarea *widget.Entry, progressBar *widget.ProgressBar, abortbtn *widget.Button, wipe bool, bigpdf bool, disableWidgets []fyne.Disableable) { + bookdir := dir + savedir := dir + bookname := strings.ReplaceAll(filepath.Base(dir), " ", "_") + + f, err := os.Stat(bookdir) + if err != nil && !strings.HasPrefix(bookdir, "Google Book: ") { + msg := fmt.Sprintf("Error opening %s: %v", bookdir, err) + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + + progressBar.SetValue(0.0) + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + return + } + + for _, v := range disableWidgets { + v.Disable() + } + + abortbtn.Enable() + + progressBar.SetValue(0.1) + + if strings.HasPrefix(dir, "Google Book: ") { + if gbookcmd == "" { + msg := fmt.Sprintf("No getgbook found, can't download Google Book. Either set -gbookcmd on the command line, or use the official build which includes an embedded copy of getgbook.\n") + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + progressBar.SetValue(0.0) + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + return + } + progressBar.SetValue(0.11) + start := len("Google Book: ") + bookname = dir[start : start+12] + + start = start + 12 + len(" Save to: ") + bookdir = dir[start:] + savedir = bookdir + + fmt.Printf("Downloading Google Book\n") + d, err := getGoogleBook(ctx, gbookcmd, bookname, bookdir) + if err != nil { + if !strings.HasSuffix(err.Error(), "signal: killed") { + msg := fmt.Sprintf("Error downloading Google Book %s\n", bookname) + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + } + progressBar.SetValue(0.0) + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + return + } + bookdir = d + savedir = d + bookname = filepath.Base(d) + } + + if strings.HasSuffix(dir, ".pdf") && !f.IsDir() { + progressBar.SetValue(0.12) + bookdir, err = extractPdfImgs(ctx, bookdir) + if err != nil { + if !strings.HasSuffix(err.Error(), "context canceled") { + msg := fmt.Sprintf("Error opening PDF %s: %v\n", bookdir, err) + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + } + + progressBar.SetValue(0.0) + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + return + } + + // happens if extractPdfImgs recovers from a PDF panic, + // which will occur if we encounter an image we can't decode + if bookdir == "" { + msg := fmt.Sprintf("Error opening PDF\nThe format of this PDF is not supported, extract the images to .jpg manually into a\nfolder first, using a tool like the PDF image extractor at https://pdfcandy.com/extract-images.html.\n") + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + + progressBar.SetValue(0.0) + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + return + } + + savedir = strings.TrimSuffix(savedir, ".pdf") + bookname = strings.TrimSuffix(bookname, ".pdf") + } + + if strings.Contains(training, "[") { + start := strings.Index(training, "[") + 1 + end := strings.Index(training, "]") + training = training[start:end] + } + + err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, wipe, bigpdf) + if err != nil && strings.HasSuffix(err.Error(), "context canceled") { + progressBar.SetValue(0.0) + return + } + if err != nil { + msg := fmt.Sprintf("Error during processing: %v\n", err) + if strings.HasSuffix(err.Error(), "No images found") && strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { + msg = fmt.Sprintf("Error opening PDF\nNo images found in the PDF. Most likely the format of this PDF is not supported,\nextract the images to .jpg manually into a folder first, using a tool like\nthe PDF image extractor at https://pdfcandy.com/extract-images.html.\n") + } + dialog.ShowError(errors.New(msg), win) + fmt.Fprintf(os.Stderr, msg) + + progressBar.SetValue(0.0) + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + return + } + + progressBar.SetValue(1.0) + + for _, v := range disableWidgets { + v.Enable() + } + abortbtn.Disable() + + msg := fmt.Sprintf("OCR process finished successfully.\n\nYour completed files have been saved in:\n%s", savedir) + dialog.ShowInformation("OCR Complete", msg, win) +} + // startGui starts the gui process func startGui(log *log.Logger, cmd string, gbookcmd string, training string, tessdir string) error { myApp := app.New() @@ -322,7 +534,7 @@ func startGui(log *log.Logger, cmd string, gbookcmd string, training string, tes trainingOpts := mkTrainingSelect([]string{training}, myWindow) progressBar := widget.NewProgressBar() - progressBar.TextFormatter = formatProgressBarText(progressBar) + progressBar.TextFormatter = formatProgressBar(progressBar) logarea := widget.NewMultiLineEntry() logarea.Disable() @@ -350,209 +562,9 @@ func startGui(log *log.Logger, cmd string, gbookcmd string, training string, tes abortbtn.Disable() gobtn.OnTapped = func() { - if dir.Text == "" { - return - } - - stdout, err := copyStdoutToChan() - if err != nil { - msg := fmt.Sprintf("Internal error\n\nError copying stdout to chan: %v\n", err) - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - return - } - - // update log area with stdout in a concurrent goroutine, and parse it to update the progress bar - go func() { - for r := range stdout { - logarea.SetText(logarea.Text + string(r)) - logarea.CursorRow = strings.Count(logarea.Text, "\n") - - lines := strings.Split(logarea.Text, "\n") - lastline := lines[len(lines)-1] - for i, v := range progressPoints { - if strings.HasPrefix(lastline, " "+v) { - // OCRing has a number of dots after it showing how many pages have been processed, - // which we can use to update progress bar more often - // TODO: calculate number of pages we expect, so this can be set accurately - if v == "OCRing" { - if progressBar.Value < 0.5 { - progressBar.SetValue(0.5) - } - numdots := strings.Count(lastline, ".") - newval := float64(0.5) + (float64(numdots) * float64(0.01)) - if newval >= 0.9 { - newval = 0.89 - } - progressBar.SetValue(newval) - break - } - progressBar.SetValue(i) - } - } - } - }() - - stderr, err := copyStderrToChan() - if err != nil { - msg := fmt.Sprintf("Internal error\n\nError copying stdout to chan: %v\n", err) - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - return - } - - // update log area with stderr in a concurrent goroutine - go func() { - for r := range stderr { - logarea.SetText(logarea.Text + string(r)) - logarea.CursorRow = strings.Count(logarea.Text, "\n") - } - }() - - bookdir := dir.Text - savedir := dir.Text - bookname := strings.ReplaceAll(filepath.Base(dir.Text), " ", "_") - - f, err := os.Stat(bookdir) - if err != nil && !strings.HasPrefix(bookdir, "Google Book: ") { - msg := fmt.Sprintf("Error opening %s: %v", bookdir, err) - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - - progressBar.SetValue(0.0) - for _, v := range disableWidgets { - v.Enable() - } - abortbtn.Disable() - return - } - - // Do this in a goroutine so the GUI remains responsive - go func() { - for _, v := range disableWidgets { - v.Disable() - } - - abortbtn.Enable() - - progressBar.SetValue(0.1) - - if strings.HasPrefix(dir.Text, "Google Book: ") { - if gbookcmd == "" { - msg := fmt.Sprintf("No getgbook found, can't download Google Book. Either set -gbookcmd on the command line, or use the official build which includes an embedded copy of getgbook.\n") - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - progressBar.SetValue(0.0) - for _, v := range disableWidgets { - v.Enable() - } - abortbtn.Disable() - return - } - progressBar.SetValue(0.11) - start := len("Google Book: ") - bookname = dir.Text[start : start+12] - - start = start + 12 + len(" Save to: ") - bookdir = dir.Text[start:] - savedir = bookdir - - fmt.Printf("Downloading Google Book\n") - d, err := getGoogleBook(ctx, gbookcmd, bookname, bookdir) - if err != nil { - if !strings.HasSuffix(err.Error(), "signal: killed") { - msg := fmt.Sprintf("Error downloading Google Book %s\n", bookname) - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - } - progressBar.SetValue(0.0) - for _, v := range disableWidgets { - v.Enable() - } - abortbtn.Disable() - return - } - bookdir = d - savedir = d - bookname = filepath.Base(d) - } - - if strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { - progressBar.SetValue(0.12) - bookdir, err = extractPdfImgs(ctx, bookdir) - if err != nil { - if !strings.HasSuffix(err.Error(), "context canceled") { - msg := fmt.Sprintf("Error opening PDF %s: %v\n", bookdir, err) - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - } - - progressBar.SetValue(0.0) - for _, v := range disableWidgets { - v.Enable() - } - abortbtn.Disable() - return - } - - // happens if extractPdfImgs recovers from a PDF panic, - // which will occur if we encounter an image we can't decode - if bookdir == "" { - msg := fmt.Sprintf("Error opening PDF\nThe format of this PDF is not supported, extract the images to .jpg manually into a\nfolder first, using a tool like the PDF image extractor at https://pdfcandy.com/extract-images.html.\n") - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - - progressBar.SetValue(0.0) - for _, v := range disableWidgets { - v.Enable() - } - abortbtn.Disable() - return - } - - savedir = strings.TrimSuffix(savedir, ".pdf") - bookname = strings.TrimSuffix(bookname, ".pdf") - } - - training := trainingOpts.Selected - if strings.Contains(training, "[") { - start := strings.Index(training, "[") + 1 - end := strings.Index(training, "]") - training = training[start:end] - } - - err = startProcess(ctx, log, cmd, bookdir, bookname, training, savedir, tessdir, !wipe.Checked, bigpdf.Checked) - if err != nil && strings.HasSuffix(err.Error(), "context canceled") { - progressBar.SetValue(0.0) - return - } - if err != nil { - msg := fmt.Sprintf("Error during processing: %v\n", err) - if strings.HasSuffix(err.Error(), "No images found") && strings.HasSuffix(dir.Text, ".pdf") && !f.IsDir() { - msg = fmt.Sprintf("Error opening PDF\nNo images found in the PDF. Most likely the format of this PDF is not supported,\nextract the images to .jpg manually into a folder first, using a tool like\nthe PDF image extractor at https://pdfcandy.com/extract-images.html.\n") - } - dialog.ShowError(errors.New(msg), myWindow) - fmt.Fprintf(os.Stderr, msg) - - progressBar.SetValue(0.0) - for _, v := range disableWidgets { - v.Enable() - } - abortbtn.Disable() - return - } - - progressBar.SetValue(1.0) - - for _, v := range disableWidgets { - v.Enable() - } - abortbtn.Disable() - - msg := fmt.Sprintf("OCR process finished successfully.\n\nYour completed files have been saved in:\n%s", savedir) - dialog.ShowInformation("OCR Complete", msg, myWindow) - }() + start(ctx, log, cmd, tessdir, gbookcmd, dir.Text, trainingOpts.Selected, myWindow, logarea, progressBar, abortbtn, !wipe.Checked, bigpdf.Checked, disableWidgets) } + gobtn.Disable() choices := container.New(layout.NewGridLayout(3), folderBtn, pdfBtn, gbookBtn) diff --git a/cmd/rescribe/gui_test.go b/cmd/rescribe/gui_test.go new file mode 100644 index 0000000..99a924f --- /dev/null +++ b/cmd/rescribe/gui_test.go @@ -0,0 +1,77 @@ +// Copyright 2022 Nick White. +// Use of this source code is governed by the GPLv3 +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "strings" + "testing" + + "fyne.io/fyne/v2/app" + "fyne.io/fyne/v2/widget" +) + +func TestFormatProgressBar(t *testing.T) { + cases := []struct { + val float64 + str string + }{ + {0.0, ""}, + {0.01, "Processing"}, + {0.11, "Downloading"}, + {0.12, "Processing PDF"}, + {0.2, "Preprocessing"}, + {0.5, "OCRing"}, + {0.55, "OCRing"}, + {0.89, "OCRing"}, + {0.9, "Analysing"}, + {1.0, "Done"}, + {1.1, "Processing"}, + } + + _ = app.New() // shouldn't be needed for test but we get a panic without it + bar := widget.NewProgressBar() + + for _, c := range cases { + t.Run(fmt.Sprintf("%s_%.1f", c.str, c.val), func(t *testing.T) { + bar.Value = c.val + got := formatProgressBar(bar)() + if got != c.str { + t.Fatalf("Expected %s, got %s", c.str, got) + } + }) + } +} + +func TestUpdateProgress(t *testing.T) { + cases := []struct { + log string + val float64 + }{ + {"Downloading", 0.11}, + {"Preprocessing", 0.2}, + {"Preprocessing\nOCRing", 0.5}, + {"Preprocessing\nOCRing...", 0.53}, + {"OCRing........................................", 0.89}, + {"OCRing..\nAnalysing", 0.9}, + {"Done", 1.0}, + {"Weirdness", 0.0}, + } + + _ = app.New() // shouldn't be needed for test but we get a panic without it + bar := widget.NewProgressBar() + + for _, c := range cases { + t.Run(c.log, func(t *testing.T) { + l := strings.ReplaceAll(" "+c.log, "\n", "\n ") + bar.Value = 0.0 + updateProgress(l, bar) + got := bar.Value + if got != c.val { + t.Fatalf("Expected %f, got %f", c.val, got) + } + }) + } +} diff --git a/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a new file mode 100644 index 0000000..1a7ed9c --- /dev/null +++ b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/174f82f558636f2a @@ -0,0 +1,2 @@ +go test fuzz v1 +string("https://www0google\xf7/books/edition/_/") diff --git a/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f7963 b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f7963 new file mode 100644 index 0000000..b637539 --- /dev/null +++ b/cmd/rescribe/testdata/fuzz/FuzzGetBookIdFromUrl/60892155cf2f7963 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("https://Books.google\xc1&id=") |