summaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-10-23 15:41:26 +0100
committerNick White <git@njw.name>2019-10-23 15:41:26 +0100
commit01c99da2c2c7960d0cf6e0a49ede919948898a2c (patch)
treec264271b3bc8f03704db224de601b9977921b735 /cmd
parentd9974362e406611b86a5a47b2c9f1771fb2a2719 (diff)
getpipelinebook: default to downloading corresponding page images, and add option to download the original page images too
Diffstat (limited to 'cmd')
-rw-r--r--cmd/getpipelinebook/main.go41
1 files changed, 38 insertions, 3 deletions
diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go
index 9e900bf..d7d3e41 100644
--- a/cmd/getpipelinebook/main.go
+++ b/cmd/getpipelinebook/main.go
@@ -7,11 +7,19 @@ import (
"log"
"os"
"path/filepath"
+ "strings"
"rescribe.xyz/bookpipeline"
)
-const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n"
+const usage = `Usage: getpipelinebook [-a] [-c] [-v] bookname
+
+Downloads the pipeline results for a book.
+
+By default this downloads the best hOCR version for each page with
+the corresponding binarised image, and the best, conf and graph.png
+analysis files.
+`
// null writer to enable non-verbose logging to be discarded
type NullWriter bool
@@ -32,7 +40,8 @@ type Pipeliner interface {
}
func main() {
- all := flag.Bool("a", false, "Get all files for book, not just hOCR and analysis files")
+ all := flag.Bool("a", false, "Get all files for book")
+ colour := flag.Bool("c", false, "Also get the original page images")
verbose := flag.Bool("v", false, "Verbose")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), usage)
@@ -98,7 +107,7 @@ func main() {
}
defer f.Close()
- verboselog.Println("Downloading HOCR files")
+ verboselog.Println("Downloading HOCR and corresponding image files")
s := bufio.NewScanner(f)
for s.Scan() {
fn = filepath.Join(bookname, s.Text())
@@ -107,6 +116,32 @@ func main() {
if err != nil {
log.Fatalln("Failed to download file", fn, err)
}
+ b := strings.TrimSuffix(s.Text(), ".hocr")
+ fn = filepath.Join(bookname, b + ".png")
+ verboselog.Println("Downloading file", fn)
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download file", fn, err)
+ }
+ if *colour {
+ parts := strings.SplitN(s.Text(), "_bin", 2)
+ if len(parts) < 2 {
+ verboselog.Println("Can't find page number for original page image, skipping", b)
+ continue
+ }
+ num := parts[0]
+ fn = filepath.Join(bookname, num + ".jpg")
+ verboselog.Println("Downloading file", fn)
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ fn = filepath.Join(bookname, num + ".png")
+ verboselog.Println("Downloading file", fn)
+ err = conn.Download(conn.WIPStorageId(), fn, fn)
+ if err != nil {
+ log.Fatalln("Failed to download file", fn, err)
+ }
+ }
+ }
}
analyses := []string{"conf", "graph.png"}