diff options
Diffstat (limited to 'cmd/rescribe')
| -rw-r--r-- | cmd/rescribe/main.go | 181 | 
1 files changed, 153 insertions, 28 deletions
| diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 54338f3..6ff99e7 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -11,8 +11,10 @@ package main  import (  	"archive/zip" +	"bufio"  	"bytes"  	_ "embed" +	"errors"  	"flag"  	"fmt"  	"io" @@ -26,6 +28,14 @@ import (  	"strings"  	"time" +	"fyne.io/fyne/v2" +	"fyne.io/fyne/v2/app" +	"fyne.io/fyne/v2/container" +	"fyne.io/fyne/v2/dialog" +	"fyne.io/fyne/v2/layout" +	"fyne.io/fyne/v2/theme" +	"fyne.io/fyne/v2/widget" +  	"rescribe.xyz/bookpipeline"  	"rescribe.xyz/utils/pkg/hocr" @@ -158,21 +168,12 @@ These training files are included in rescribe, and are always available:  	}  	flag.Parse() -	if flag.NArg() < 1 || flag.NArg() > 2 { +	if flag.NArg() > 2 {  		flag.Usage()  		return  	}  	var err error -	bookdir := flag.Arg(0) -	bookname := filepath.Base(bookdir) -	savedir := bookdir -	tessdir := "" -	if flag.NArg() > 1 { -		savedir = flag.Arg(1) -	} -	trainingPath := *training -	tessCommand := *tesscmd  	var verboselog *log.Logger  	if *verbose { @@ -182,6 +183,10 @@ These training files are included in rescribe, and are always available:  		verboselog = log.New(n, "", 0)  	} +	tessdir := "" +	trainingPath := *training +	tessCommand := *tesscmd +  	tessdir, err = ioutil.TempDir("", "tesseract")  	if err != nil {  		log.Fatalln("Error setting up tesseract directory:", err) @@ -238,28 +243,146 @@ These training files are included in rescribe, and are always available:  		log.Fatalln("Error setting TESSDATA_PREFIX:", err)  	} -	_, err = exec.Command(tessCommand, "--help").Output() +	if flag.NArg() < 1 { +		myApp := app.New() +		myWindow := myApp.NewWindow("Rescribe OCR") + +		var gobtn *widget.Button + +		dir := widget.NewEntry() +		dir.SetPlaceHolder("Folder to process") +		dir.OnChanged = func(s string) { +			// TODO: also check if string is a directory, and only enable if so +			if dir.Text != "" { +				gobtn.Enable() +			} else { +				gobtn.Disable() +			} +		} + +		openbtn := widget.NewButtonWithIcon("Choose folder", theme.FolderOpenIcon(), func() { +			dialog.ShowFolderOpen(func(uri fyne.ListableURI, err error) { +				if err == nil && uri != nil { +					dir.SetText(uri.Path()) +				} +		}, myWindow)}) + +		progressBar := widget.NewProgressBar() + +		logarea := widget.NewMultiLineEntry() +		logarea.Disable() + + +		// TODO: have the button be pressed if enter is pressed +		gobtn = widget.NewButtonWithIcon("Process OCR", theme.UploadIcon(), func() { +			if dir.Text == "" { +				return +			} + +			gobtn.Disable() +			gobtn.SetText("Processing...") + +			progressBar.SetValue(0.5) + + +			// https://stackoverflow.com/questions/10473800/in-go-how-do-i-capture-stdout-of-a-function-into-a-string +			// https://eli.thegreenplace.net/2020/faking-stdin-and-stdout-in-go/ +			origStdout := os.Stdout +			r, w, err := os.Pipe() +			if err != nil { +				log.Fatalln("Error creating pipe for stdout redirection: ", err) +			} +			os.Stdout = w +			defer func() { +				w.Close() +				os.Stdout = origStdout +			}() + +			bufReader := bufio.NewReader(r) +			outC := make(chan rune) +			go func() { +				for { +					r, _, err := bufReader.ReadRune() +					if err != nil && err != io.EOF { +						log.Fatalf("Error reading stdout: %v", err) +						return +					} +					outC <- r +					if err == io.EOF { +						close(outC) +						return +					} +				} +			}() + +			// update log area with output from outC in a concurrent goroutine +			go func() { +				for r := range outC { +					logarea.SetText(logarea.Text + string(r)) +					logarea.CursorRow = strings.Count(logarea.Text, "\n") +					// TODO: set text on progress bar, or a label below it, to latest line printed, rather than just using a whole multiline entry like this +					// TODO: parse the stdout and set progressBar based on that +				} +			}() + +			err = startProcess(*verboselog, tessCommand, dir.Text, filepath.Base(dir.Text), trainingName, *systess, dir.Text, tessdir) +			if err != nil { +				log.Fatalln(err) +			} + +			progressBar.SetValue(1.0) +			gobtn.SetText("Process OCR") +			gobtn.Enable() +		}) +		gobtn.Disable() + +		diropener := container.New(layout.NewGridLayout(2), dir, openbtn) + +		content := container.NewVBox(diropener, gobtn, progressBar, logarea) + +		myWindow.SetContent(content) + +		myWindow.Show() +		myApp.Run() +		return +	} + +	bookdir := flag.Arg(0) +	bookname := filepath.Base(bookdir) +	savedir := bookdir +	if flag.NArg() > 1 { +		savedir = flag.Arg(1) +	} + +	err = startProcess(*verboselog, tessCommand, bookdir, bookname, trainingName, *systess, savedir, tessdir)  	if err != nil { -		fmt.Fprintf(os.Stderr, "Error: Can't run Tesseract.\n") -		fmt.Fprintf(os.Stderr, "Ensure that Tesseract is installed and available, or don't use the -systess flag.\n") -		fmt.Fprintf(os.Stderr, "You may need to -tesscmd to the full path of Tesseract.exe if you're on Windows, like this:\n") -		fmt.Fprintf(os.Stderr, "  rescribe -tesscmd 'C:\\Program Files\\Tesseract OCR (x86)\\tesseract.exe' ...\n") -		fmt.Fprintf(os.Stderr, "Error message: %v\n", err) -		os.Exit(1) +		log.Fatalln(err) +	} +} + +func startProcess(logger log.Logger, tessCommand string, bookdir string, bookname string, trainingName string, systess bool, savedir string, tessdir string) error { +	_, err := exec.Command(tessCommand, "--help").Output() +	if err != nil { +		errmsg := "Error, Can't run Tesseract\n" +		errmsg += "Ensure that Tesseract is installed and available, or don't use the -systess flag.\n" +		errmsg += "You may need to -tesscmd to the full path of Tesseract.exe if you're on Windows, like this:\n" +		errmsg += "  rescribe -tesscmd 'C:\\Program Files\\Tesseract OCR (x86)\\tesseract.exe' ...\n" +		errmsg += fmt.Sprintf("Error message: %v", err) +		return errors.New(errmsg)  	}  	tempdir, err := ioutil.TempDir("", "bookpipeline")  	if err != nil { -		log.Fatalln("Error setting up temporary directory:", err) +		return errors.New(fmt.Sprintf("Error setting up temporary directory: %v", err))  	}  	var conn Pipeliner -	conn = &bookpipeline.LocalConn{Logger: verboselog, TempDir: tempdir} +	conn = &bookpipeline.LocalConn{Logger: &logger, TempDir: tempdir}  	conn.Log("Setting up session")  	err = conn.Init()  	if err != nil { -		log.Fatalln("Error setting up connection:", err) +		return errors.New(fmt.Sprintf("Error setting up connection: %v", err))  	}  	conn.Log("Finished setting up session") @@ -268,42 +391,42 @@ These training files are included in rescribe, and are always available:  	err = uploadbook(bookdir, bookname, conn)  	if err != nil {  		_ = os.RemoveAll(tempdir) -		log.Fatalln(err) +		return errors.New(fmt.Sprintf("Error uploading book: %v", err))  	}  	fmt.Printf("Processing book\n")  	err = processbook(trainingName, tessCommand, conn)  	if err != nil {  		_ = os.RemoveAll(tempdir) -		log.Fatalln(err) +		return errors.New(fmt.Sprintf("Error processing book: %v", err))  	}  	fmt.Printf("Saving finished book to %s\n", savedir)  	err = os.MkdirAll(savedir, 0755)  	if err != nil { -		log.Fatalf("Error creating save directory %s: %v", savedir, err) +		return errors.New(fmt.Sprintf("Error creating save directory %s: %v", savedir, err))  	}  	err = downloadbook(savedir, bookname, conn)  	if err != nil {  		_ = os.RemoveAll(tempdir) -		log.Fatalln(err) +		return errors.New(fmt.Sprintf("Error saving book: %v", err))  	}  	err = os.RemoveAll(tempdir)  	if err != nil { -		log.Fatalf("Error removing temporary directory %s: %v", tempdir, err) +		return errors.New(fmt.Sprintf("Error removing temporary directory %s: %v", tempdir, err))  	} -	if !*systess { +	if !systess {  		err = os.RemoveAll(tessdir)  		if err != nil { -			log.Fatalf("Error removing tesseract directory %s: %v", tessdir, err) +			return errors.New(fmt.Sprintf("Error removing tesseract directory %s: %v", tessdir, err))  		}  	}  	hocrs, err := filepath.Glob(fmt.Sprintf("%s%s*.hocr", savedir, string(filepath.Separator)))  	if err != nil { -		log.Fatalf("Error looking for .hocr files: %v", err) +		return errors.New(fmt.Sprintf("Error looking for .hocr files: %v", err))  	}  	for _, v := range hocrs { @@ -326,6 +449,8 @@ These training files are included in rescribe, and are always available:  	// For simplicity, remove .binarised.pdf and rename .colour.pdf to .pdf  	_ = os.Remove(filepath.Join(savedir, bookname+".binarised.pdf"))  	_ = os.Rename(filepath.Join(savedir, bookname+".colour.pdf"), filepath.Join(savedir, bookname+".pdf")) + +	return nil  }  func addTxtVersion(hocrfn string) error { | 
