From 2e3bef288256fe25f982b9adeaacd2930cccd21e Mon Sep 17 00:00:00 2001 From: Antonia Rescribe Date: Mon, 20 Dec 2021 12:06:30 +0000 Subject: fixed -png flag and changed rescribe tool to save binarized png in separate folder --- cmd/getpipelinebook/main.go | 9 +++++++-- cmd/rescribe/gui.go | 4 +++- cmd/rescribe/main.go | 21 ++++++++++++++++++++- internal/pipeline/get.go | 24 +++++++++++++++++++----- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index ccedd72..965f9f7 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -40,7 +40,7 @@ func main() { binarisedpdf := flag.Bool("binarisedpdf", false, "Only download binarised PDF (can be used alongside -graph)") colourpdf := flag.Bool("colourpdf", false, "Only download colour PDF (can be used alongside -graph)") pdf := flag.Bool("pdf", false, "Only download PDFs (can be used alongside -graph)") - png := flag.Bool("png", false, "Only download best binarised png files") + png := flag.Bool("png", false, "Should only download best binarised png files") verbose := flag.Bool("v", false, "Verbose") flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -124,13 +124,18 @@ func main() { verboselog.Println("Downloading PDFs") pipeline.DownloadPdfs(bookname, bookname, conn) } + + if *png { + verboselog.Println("Downloading best PNGs") + pipeline.DownloadBestPngs(bookname, bookname, conn) + } if *binarisedpdf || *colourpdf || *graph || *pdf { return } verboselog.Println("Downloading best pages") - err = pipeline.DownloadBestPages(bookname, bookname, conn, *png) + err = pipeline.DownloadBestPages(bookname, bookname, conn) if err != nil { log.Fatalln(err) } diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go index 5e9e1dc..36d9b3a 100644 --- a/cmd/rescribe/gui.go +++ b/cmd/rescribe/gui.go @@ -99,6 +99,8 @@ func startGui(log log.Logger, cmd string, training string, systess bool, tessdir myApp := app.New() myWindow := myApp.NewWindow("Rescribe OCR") + myWindow.Resize(fyne.NewSize(800, 400)) + var gobtn *widget.Button dir := widget.NewEntry() @@ -126,7 +128,7 @@ func startGui(log log.Logger, cmd string, training string, systess bool, tessdir logarea.Disable() // TODO: have the button be pressed if enter is pressed - gobtn = widget.NewButtonWithIcon("Process OCR", theme.UploadIcon(), func() { + gobtn = widget.NewButtonWithIcon("Start OCR", theme.UploadIcon(), func() { if dir.Text == "" { return } diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go index 59d8166..89eb590 100644 --- a/cmd/rescribe/main.go +++ b/cmd/rescribe/main.go @@ -471,7 +471,21 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam if err != nil { log.Fatalf("Error moving hocr %s to hocr directory: %v", v, err) } + + pngname := strings.Replace(v, ".hocr", ".png", 1) + err = os.MkdirAll(filepath.Join(savedir, "png"), 0755) + if err != nil { + log.Fatalf("Error creating hocr directory: %v", err) + } + + err = os.Rename(pngname, filepath.Join(savedir, "png", filepath.Base(pngname))) + if err != nil { + log.Fatalf("Error moving png %s to hocr directory: %v", pngname, err) + } + } + + // For simplicity, remove .binarised.pdf and rename .colour.pdf to .pdf _ = os.Remove(filepath.Join(savedir, bookname+".binarised.pdf")) @@ -531,10 +545,15 @@ func uploadbook(dir string, name string, conn Pipeliner) error { } func downloadbook(dir string, name string, conn Pipeliner) error { - err := pipeline.DownloadBestPages(dir, name, conn, false) + err := pipeline.DownloadBestPages(dir, name, conn) if err != nil { return fmt.Errorf("Error downloading best pages: %v", err) } + + err1 := pipeline.DownloadBestPngs(dir, name, conn) + if err1 != nil { + return fmt.Errorf("Error downloading best pngs: %v", err) + } err = pipeline.DownloadPdfs(dir, name, conn) if err != nil { diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go index 960c8f7..2559857 100644 --- a/internal/pipeline/get.go +++ b/internal/pipeline/get.go @@ -12,7 +12,7 @@ import ( "strings" ) -func DownloadBestPages(dir string, name string, conn Downloader, pluspngs bool) error { +func DownloadBestPages(dir string, name string, conn Downloader) error { key := filepath.Join(name, "best") fn := filepath.Join(dir, "best") err := conn.Download(conn.WIPStorageId(), key, fn) @@ -35,12 +35,25 @@ func DownloadBestPages(dir string, name string, conn Downloader, pluspngs bool) return fmt.Errorf("Failed to download file %s: %v", key, err) } } + return nil +} - if !pluspngs { - return nil - } +func DownloadBestPngs(dir string, name string, conn Downloader) error { - s = bufio.NewScanner(f) + key := filepath.Join(name, "best") + fn := filepath.Join(dir, "best") + err := conn.Download(conn.WIPStorageId(), key, fn) + if err != nil { + return fmt.Errorf("Failed to download 'best' file: %v", err) + } + f, err := os.Open(fn) + if err != nil { + return fmt.Errorf("Failed to open best file: %v", err) + } + defer f.Close() + + + s := bufio.NewScanner(f) for s.Scan() { imgname := strings.Replace(s.Text(), ".hocr", ".png", 1) key = filepath.Join(name, imgname) @@ -54,6 +67,7 @@ func DownloadBestPages(dir string, name string, conn Downloader, pluspngs bool) return nil } + func DownloadPdfs(dir string, name string, conn Downloader) error { for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} { key := filepath.Join(name, name+suffix) -- cgit v1.2.1-24-ge1ad