summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntonia Rescribe <antonia@rescribe.xyz>2021-12-20 12:06:30 +0000
committerNick White <git@njw.name>2021-12-20 12:27:56 +0000
commit2e3bef288256fe25f982b9adeaacd2930cccd21e (patch)
tree9b7612e82c2ff3c5b4885a55d0d2ce819ff10fc4
parent3e953e66d02416270356d0d7d3b6ea2df0260c77 (diff)
fixed -png flag and changed rescribe tool to save binarized png in separate folder
-rw-r--r--cmd/getpipelinebook/main.go9
-rw-r--r--cmd/rescribe/gui.go4
-rw-r--r--cmd/rescribe/main.go21
-rw-r--r--internal/pipeline/get.go24
4 files changed, 49 insertions, 9 deletions
diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go
index ccedd72..965f9f7 100644
--- a/cmd/getpipelinebook/main.go
+++ b/cmd/getpipelinebook/main.go
@@ -40,7 +40,7 @@ func main() {
binarisedpdf := flag.Bool("binarisedpdf", false, "Only download binarised PDF (can be used alongside -graph)")
colourpdf := flag.Bool("colourpdf", false, "Only download colour PDF (can be used alongside -graph)")
pdf := flag.Bool("pdf", false, "Only download PDFs (can be used alongside -graph)")
- png := flag.Bool("png", false, "Only download best binarised png files")
+ png := flag.Bool("png", false, "Should only download best binarised png files")
verbose := flag.Bool("v", false, "Verbose")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
@@ -124,13 +124,18 @@ func main() {
verboselog.Println("Downloading PDFs")
pipeline.DownloadPdfs(bookname, bookname, conn)
}
+
+ if *png {
+ verboselog.Println("Downloading best PNGs")
+ pipeline.DownloadBestPngs(bookname, bookname, conn)
+ }
if *binarisedpdf || *colourpdf || *graph || *pdf {
return
}
verboselog.Println("Downloading best pages")
- err = pipeline.DownloadBestPages(bookname, bookname, conn, *png)
+ err = pipeline.DownloadBestPages(bookname, bookname, conn)
if err != nil {
log.Fatalln(err)
}
diff --git a/cmd/rescribe/gui.go b/cmd/rescribe/gui.go
index 5e9e1dc..36d9b3a 100644
--- a/cmd/rescribe/gui.go
+++ b/cmd/rescribe/gui.go
@@ -99,6 +99,8 @@ func startGui(log log.Logger, cmd string, training string, systess bool, tessdir
myApp := app.New()
myWindow := myApp.NewWindow("Rescribe OCR")
+ myWindow.Resize(fyne.NewSize(800, 400))
+
var gobtn *widget.Button
dir := widget.NewEntry()
@@ -126,7 +128,7 @@ func startGui(log log.Logger, cmd string, training string, systess bool, tessdir
logarea.Disable()
// TODO: have the button be pressed if enter is pressed
- gobtn = widget.NewButtonWithIcon("Process OCR", theme.UploadIcon(), func() {
+ gobtn = widget.NewButtonWithIcon("Start OCR", theme.UploadIcon(), func() {
if dir.Text == "" {
return
}
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 59d8166..89eb590 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -471,7 +471,21 @@ func startProcess(logger log.Logger, tessCommand string, bookdir string, booknam
if err != nil {
log.Fatalf("Error moving hocr %s to hocr directory: %v", v, err)
}
+
+ pngname := strings.Replace(v, ".hocr", ".png", 1)
+ err = os.MkdirAll(filepath.Join(savedir, "png"), 0755)
+ if err != nil {
+ log.Fatalf("Error creating hocr directory: %v", err)
+ }
+
+ err = os.Rename(pngname, filepath.Join(savedir, "png", filepath.Base(pngname)))
+ if err != nil {
+ log.Fatalf("Error moving png %s to hocr directory: %v", pngname, err)
+ }
+
}
+
+
// For simplicity, remove .binarised.pdf and rename .colour.pdf to .pdf
_ = os.Remove(filepath.Join(savedir, bookname+".binarised.pdf"))
@@ -531,10 +545,15 @@ func uploadbook(dir string, name string, conn Pipeliner) error {
}
func downloadbook(dir string, name string, conn Pipeliner) error {
- err := pipeline.DownloadBestPages(dir, name, conn, false)
+ err := pipeline.DownloadBestPages(dir, name, conn)
if err != nil {
return fmt.Errorf("Error downloading best pages: %v", err)
}
+
+ err1 := pipeline.DownloadBestPngs(dir, name, conn)
+ if err1 != nil {
+ return fmt.Errorf("Error downloading best pngs: %v", err)
+ }
err = pipeline.DownloadPdfs(dir, name, conn)
if err != nil {
diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go
index 960c8f7..2559857 100644
--- a/internal/pipeline/get.go
+++ b/internal/pipeline/get.go
@@ -12,7 +12,7 @@ import (
"strings"
)
-func DownloadBestPages(dir string, name string, conn Downloader, pluspngs bool) error {
+func DownloadBestPages(dir string, name string, conn Downloader) error {
key := filepath.Join(name, "best")
fn := filepath.Join(dir, "best")
err := conn.Download(conn.WIPStorageId(), key, fn)
@@ -35,12 +35,25 @@ func DownloadBestPages(dir string, name string, conn Downloader, pluspngs bool)
return fmt.Errorf("Failed to download file %s: %v", key, err)
}
}
+ return nil
+}
- if !pluspngs {
- return nil
- }
+func DownloadBestPngs(dir string, name string, conn Downloader) error {
- s = bufio.NewScanner(f)
+ key := filepath.Join(name, "best")
+ fn := filepath.Join(dir, "best")
+ err := conn.Download(conn.WIPStorageId(), key, fn)
+ if err != nil {
+ return fmt.Errorf("Failed to download 'best' file: %v", err)
+ }
+ f, err := os.Open(fn)
+ if err != nil {
+ return fmt.Errorf("Failed to open best file: %v", err)
+ }
+ defer f.Close()
+
+
+ s := bufio.NewScanner(f)
for s.Scan() {
imgname := strings.Replace(s.Text(), ".hocr", ".png", 1)
key = filepath.Join(name, imgname)
@@ -54,6 +67,7 @@ func DownloadBestPages(dir string, name string, conn Downloader, pluspngs bool)
return nil
}
+
func DownloadPdfs(dir string, name string, conn Downloader) error {
for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} {
key := filepath.Join(name, name+suffix)