diff options
| author | Nick White <git@njw.name> | 2019-10-23 15:41:26 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-10-23 15:41:26 +0100 | 
| commit | 01c99da2c2c7960d0cf6e0a49ede919948898a2c (patch) | |
| tree | c264271b3bc8f03704db224de601b9977921b735 /cmd | |
| parent | d9974362e406611b86a5a47b2c9f1771fb2a2719 (diff) | |
getpipelinebook: default to downloading corresponding page images, and add option to download the original page images too
Diffstat (limited to 'cmd')
| -rw-r--r-- | cmd/getpipelinebook/main.go | 41 | 
1 files changed, 38 insertions, 3 deletions
| diff --git a/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index 9e900bf..d7d3e41 100644 --- a/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -7,11 +7,19 @@ import (  	"log"  	"os"  	"path/filepath" +	"strings"  	"rescribe.xyz/bookpipeline"  ) -const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n" +const usage = `Usage: getpipelinebook [-a] [-c] [-v] bookname + +Downloads the pipeline results for a book. + +By default this downloads the best hOCR version for each page with +the corresponding binarised image, and the best, conf and graph.png +analysis files. +`  // null writer to enable non-verbose logging to be discarded  type NullWriter bool @@ -32,7 +40,8 @@ type Pipeliner interface {  }  func main() { -	all := flag.Bool("a", false, "Get all files for book, not just hOCR and analysis files") +	all := flag.Bool("a", false, "Get all files for book") +	colour := flag.Bool("c", false, "Also get the original page images")  	verbose := flag.Bool("v", false, "Verbose")  	flag.Usage = func() {  		fmt.Fprintf(flag.CommandLine.Output(), usage) @@ -98,7 +107,7 @@ func main() {  	}  	defer f.Close() -	verboselog.Println("Downloading HOCR files") +	verboselog.Println("Downloading HOCR and corresponding image files")  	s := bufio.NewScanner(f)  	for s.Scan() {  		fn = filepath.Join(bookname, s.Text()) @@ -107,6 +116,32 @@ func main() {  		if err != nil {  			log.Fatalln("Failed to download file", fn, err)  		} +		b := strings.TrimSuffix(s.Text(), ".hocr") +		fn = filepath.Join(bookname, b + ".png") +		verboselog.Println("Downloading file", fn) +		err = conn.Download(conn.WIPStorageId(), fn, fn) +		if err != nil { +			log.Fatalln("Failed to download file", fn, err) +		} +		if *colour { +			parts := strings.SplitN(s.Text(), "_bin", 2) +			if len(parts) < 2 { +				verboselog.Println("Can't find page number for original page image, skipping", b) +				continue +			} +			num := parts[0] +			fn = filepath.Join(bookname, num + ".jpg") +			verboselog.Println("Downloading file", fn) +			err = conn.Download(conn.WIPStorageId(), fn, fn) +			if err != nil { +				fn = filepath.Join(bookname, num + ".png") +				verboselog.Println("Downloading file", fn) +				err = conn.Download(conn.WIPStorageId(), fn, fn) +				if err != nil { +					log.Fatalln("Failed to download file", fn, err) +				} +			} +		}  	}  	analyses := []string{"conf", "graph.png"} | 
