diff options
author | Nick White <git@njw.name> | 2019-10-23 15:41:26 +0100 |
---|---|---|
committer | Nick White <git@njw.name> | 2019-10-23 15:41:26 +0100 |
commit | 01c99da2c2c7960d0cf6e0a49ede919948898a2c (patch) | |
tree | c264271b3bc8f03704db224de601b9977921b735 | |
parent | d9974362e406611b86a5a47b2c9f1771fb2a2719 (diff) |
getpipelinebook: default to downloading corresponding page images, and add option to download the original page images too
-rw-r--r-- | cmd/getpipelinebook/main.go | 41 |
1 files changed, 38 insertions, 3 deletions
diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index 9e900bf..d7d3e41 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -7,11 +7,19 @@ import ( "log" "os" "path/filepath" + "strings" "rescribe.xyz/bookpipeline" ) -const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n" +const usage = `Usage: getpipelinebook [-a] [-c] [-v] bookname + +Downloads the pipeline results for a book. + +By default this downloads the best hOCR version for each page with +the corresponding binarised image, and the best, conf and graph.png +analysis files. +` // null writer to enable non-verbose logging to be discarded type NullWriter bool @@ -32,7 +40,8 @@ type Pipeliner interface { } func main() { - all := flag.Bool("a", false, "Get all files for book, not just hOCR and analysis files") + all := flag.Bool("a", false, "Get all files for book") + colour := flag.Bool("c", false, "Also get the original page images") verbose := flag.Bool("v", false, "Verbose") flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -98,7 +107,7 @@ func main() { } defer f.Close() - verboselog.Println("Downloading HOCR files") + verboselog.Println("Downloading HOCR and corresponding image files") s := bufio.NewScanner(f) for s.Scan() { fn = filepath.Join(bookname, s.Text()) @@ -107,6 +116,32 @@ func main() { if err != nil { log.Fatalln("Failed to download file", fn, err) } + b := strings.TrimSuffix(s.Text(), ".hocr") + fn = filepath.Join(bookname, b + ".png") + verboselog.Println("Downloading file", fn) + err = conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + log.Fatalln("Failed to download file", fn, err) + } + if *colour { + parts := strings.SplitN(s.Text(), "_bin", 2) + if len(parts) < 2 { + verboselog.Println("Can't find page number for original page image, skipping", b) + continue + } + num := parts[0] + fn = filepath.Join(bookname, num + ".jpg") + verboselog.Println("Downloading file", fn) + err = conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + fn = filepath.Join(bookname, num + ".png") + verboselog.Println("Downloading file", fn) + err = conn.Download(conn.WIPStorageId(), fn, fn) + if err != nil { + log.Fatalln("Failed to download file", fn, err) + } + } + } } analyses := []string{"conf", "graph.png"} |