summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-12-07 16:53:58 +0000
committerNick White <git@njw.name>2020-12-07 16:53:58 +0000
commit068ad0b666705a49ab22d7b48cd6a7d67b37f234 (patch)
tree36a422929c7d53f49a0f549fc4781e99b1d9423d
parent4fcbfba65689dc5e8ad46ba467343d3da376d92a (diff)
[rescribe] Allow saving of results to somewhere other than a directory named after the book being processed
-rw-r--r--cmd/getpipelinebook/main.go10
-rw-r--r--cmd/rescribe/main.go38
-rw-r--r--internal/pipeline/get.go53
3 files changed, 57 insertions, 44 deletions
diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go
index 5116414..ccedd72 100644
--- a/cmd/getpipelinebook/main.go
+++ b/cmd/getpipelinebook/main.go
@@ -87,7 +87,7 @@ func main() {
if *all {
verboselog.Println("Downloading all files for", bookname)
- err = pipeline.DownloadAll(bookname, conn)
+ err = pipeline.DownloadAll(bookname, bookname, conn)
if err != nil {
log.Fatalln(err)
}
@@ -122,7 +122,7 @@ func main() {
if *pdf {
verboselog.Println("Downloading PDFs")
- pipeline.DownloadPdfs(bookname, conn)
+ pipeline.DownloadPdfs(bookname, bookname, conn)
}
if *binarisedpdf || *colourpdf || *graph || *pdf {
@@ -130,19 +130,19 @@ func main() {
}
verboselog.Println("Downloading best pages")
- err = pipeline.DownloadBestPages(bookname, conn, *png)
+ err = pipeline.DownloadBestPages(bookname, bookname, conn, *png)
if err != nil {
log.Fatalln(err)
}
verboselog.Println("Downloading PDFs")
- pipeline.DownloadPdfs(bookname, conn)
+ pipeline.DownloadPdfs(bookname, bookname, conn)
if err != nil {
log.Fatalln(err)
}
verboselog.Println("Downloading analyses")
- err = pipeline.DownloadAnalyses(bookname, conn)
+ err = pipeline.DownloadAnalyses(bookname, bookname, conn)
if err != nil {
log.Fatalln(err)
}
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index 880bbc2..8414c53 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -29,6 +29,9 @@ import (
const usage = `Usage: rescribe [-v] [-t training] bookdir [savedir]
Process and OCR a book using the Rescribe pipeline on a local machine.
+
+OCR results are saved into the bookdir directory unless savedir is
+specified.
`
const QueueTimeoutSecs = 2 * 60
@@ -93,17 +96,16 @@ func main() {
}
flag.Parse()
- if flag.NArg() < 1 || flag.NArg() > 3 {
+ if flag.NArg() < 1 || flag.NArg() > 2 {
flag.Usage()
return
}
bookdir := flag.Arg(0)
- var bookname string
+ bookname := filepath.Base(bookdir)
+ savedir := bookdir
if flag.NArg() > 1 {
- bookname = flag.Arg(1)
- } else {
- bookname = filepath.Base(bookdir)
+ savedir = flag.Arg(1)
}
var verboselog *log.Logger
@@ -172,8 +174,12 @@ func main() {
log.Fatalln(err)
}
- fmt.Printf("Saving finished book to %s\n", bookname)
- err = downloadbook(bookname, conn)
+ fmt.Printf("Saving finished book to %s\n", savedir)
+ err = os.MkdirAll(savedir, 0755)
+ if err != nil {
+ log.Fatalf("Error creating save directory %s: %v", savedir, err)
+ }
+ err = downloadbook(savedir, bookname, conn)
if err != nil {
_ = os.RemoveAll(tempdir)
log.Fatalln(err)
@@ -184,7 +190,7 @@ func main() {
log.Fatalf("Error removing temporary directory %s: %v", tempdir, err)
}
- hocrs, err := filepath.Glob(fmt.Sprintf("%s%s*hocr", bookname, string(filepath.Separator)))
+ hocrs, err := filepath.Glob(fmt.Sprintf("%s%s*hocr", savedir, string(filepath.Separator)))
if err != nil {
log.Fatalf("Error looking for .hocr files: %v", err)
}
@@ -195,20 +201,20 @@ func main() {
log.Fatalf("Error creating txt version of %s: %v", v, err)
}
- err = os.MkdirAll(filepath.Join(bookname, "hocr"), 0755)
+ err = os.MkdirAll(filepath.Join(savedir, "hocr"), 0755)
if err != nil {
log.Fatalf("Error creating hocr directory: %v", err)
}
- err = os.Rename(v, filepath.Join(bookname, "hocr", filepath.Base(v)))
+ err = os.Rename(v, filepath.Join(savedir, "hocr", filepath.Base(v)))
if err != nil {
log.Fatalf("Error moving hocr %s to hocr directory: %v", v, err)
}
}
// For simplicity, remove .binarised.pdf and rename .colour.pdf to .pdf
- _ = os.Remove(filepath.Join(bookname, bookname + ".binarised.pdf"))
- _ = os.Rename(filepath.Join(bookname, bookname + ".colour.pdf"), filepath.Join(bookname, bookname + ".pdf"))
+ _ = os.Remove(filepath.Join(savedir, bookname + ".binarised.pdf"))
+ _ = os.Rename(filepath.Join(savedir, bookname + ".colour.pdf"), filepath.Join(savedir, bookname + ".pdf"))
}
func addTxtVersion(hocrfn string) error {
@@ -257,23 +263,23 @@ func uploadbook(dir string, name string, conn Pipeliner) error {
return nil
}
-func downloadbook(name string, conn Pipeliner) error {
+func downloadbook(dir string, name string, conn Pipeliner) error {
err := os.MkdirAll(name, 0755)
if err != nil {
log.Fatalln("Failed to create directory", name, err)
}
- err = pipeline.DownloadBestPages(name, conn, false)
+ err = pipeline.DownloadBestPages(dir, name, conn, false)
if err != nil {
return fmt.Errorf("Error downloading best pages: %v", err)
}
- err = pipeline.DownloadPdfs(name, conn)
+ err = pipeline.DownloadPdfs(dir, name, conn)
if err != nil {
return fmt.Errorf("Error downloading PDFs: %v", err)
}
- err = pipeline.DownloadAnalyses(name, conn)
+ err = pipeline.DownloadAnalyses(dir, name, conn)
if err != nil {
return fmt.Errorf("Error downloading analyses: %v", err)
}
diff --git a/internal/pipeline/get.go b/internal/pipeline/get.go
index 6949062..6c5b92c 100644
--- a/internal/pipeline/get.go
+++ b/internal/pipeline/get.go
@@ -12,9 +12,10 @@ import (
"strings"
)
-func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error {
- fn := filepath.Join(name, "best")
- err := conn.Download(conn.WIPStorageId(), fn, fn)
+func DownloadBestPages(dir string, name string, conn Pipeliner, pluspngs bool) error {
+ key := filepath.Join(name, "best")
+ fn := filepath.Join(dir, "best")
+ err := conn.Download(conn.WIPStorageId(), key, fn)
if err != nil {
return fmt.Errorf("Failed to download 'best' file: %v", err)
}
@@ -26,11 +27,12 @@ func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error {
s := bufio.NewScanner(f)
for s.Scan() {
- fn = filepath.Join(name, s.Text())
- conn.Log("Downloading file", fn)
- err = conn.Download(conn.WIPStorageId(), fn, fn)
+ key = filepath.Join(name, s.Text())
+ fn = filepath.Join(dir, s.Text())
+ conn.Log("Downloading file", key)
+ err = conn.Download(conn.WIPStorageId(), key, fn)
if err != nil {
- return fmt.Errorf("Failed to download file %s: %v", fn, err)
+ return fmt.Errorf("Failed to download file %s: %v", key, err)
}
}
@@ -40,49 +42,54 @@ func DownloadBestPages(name string, conn Pipeliner, pluspngs bool) error {
s = bufio.NewScanner(f)
for s.Scan() {
- txtfn := filepath.Join(name, s.Text())
- fn = strings.Replace(txtfn, ".hocr", ".png", 1)
- conn.Log("Downloading file", fn)
- err = conn.Download(conn.WIPStorageId(), fn, fn)
+ imgname := strings.Replace(s.Text(), ".hocr", ".png", 1)
+ key = filepath.Join(name, imgname)
+ fn = filepath.Join(dir, imgname)
+ conn.Log("Downloading file", key)
+ err = conn.Download(conn.WIPStorageId(), key, fn)
if err != nil {
- return fmt.Errorf("Failed to download file", fn, err)
+ return fmt.Errorf("Failed to download file %s: %v", key, err)
}
}
return nil
}
-func DownloadPdfs(name string, conn Pipeliner) error {
+func DownloadPdfs(dir string, name string, conn Pipeliner) error {
for _, suffix := range []string{".colour.pdf", ".binarised.pdf"} {
- fn := filepath.Join(name, name+suffix)
- err := conn.Download(conn.WIPStorageId(), fn, fn)
+ key := filepath.Join(name, name+suffix)
+ fn := filepath.Join(dir, name+suffix)
+ err := conn.Download(conn.WIPStorageId(), key, fn)
if err != nil {
- return fmt.Errorf("Failed to download PDF %s: %v", fn, err)
+ return fmt.Errorf("Failed to download PDF %s: %v", key, err)
}
}
return nil
}
-func DownloadAnalyses(name string, conn Pipeliner) error {
+func DownloadAnalyses(dir string, name string, conn Pipeliner) error {
for _, a := range []string{"conf", "graph.png"} {
- fn := filepath.Join(name, a)
- err := conn.Download(conn.WIPStorageId(), fn, fn)
+ key := filepath.Join(name, a)
+ fn := filepath.Join(dir, a)
+ err := conn.Download(conn.WIPStorageId(), key, fn)
if err != nil {
- return fmt.Errorf("Failed to download analysis file %s: %v", fn, err)
+ return fmt.Errorf("Failed to download analysis file %s: %v", key, err)
}
}
return nil
}
-func DownloadAll(name string, conn Pipeliner) error {
+func DownloadAll(dir string, name string, conn Pipeliner) error {
objs, err := conn.ListObjects(conn.WIPStorageId(), name)
if err != nil {
return fmt.Errorf("Failed to get list of files for book", name, err)
}
for _, i := range objs {
+ base := filepath.Base(i)
+ fn := filepath.Join(dir, base)
conn.Log("Downloading", i)
- err = conn.Download(conn.WIPStorageId(), i, i)
+ err = conn.Download(conn.WIPStorageId(), i, fn)
if err != nil {
- return fmt.Errorf("Failed to download file", i, err)
+ return fmt.Errorf("Failed to download file %s: %v", i, err)
}
}
return nil