From 629e436f63da8f19fd1654a634edf8e4f1c2bdad Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 16 Jun 2020 16:57:34 +0100 Subject: [getallhocrs] Skip files which have already been downloaded --- cmd/bookpipeline/main.go | 9 ++++++++- cmd/getallhocrs/main.go | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 16ce596..0afda16 100644 --- a/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -667,7 +667,14 @@ func stopTimer(t *time.Timer) { // TODO: rather than relying on journald, would be nicer to save the logs // ourselves maybe, so that we weren't relying on a particular systemd -// setup. +// setup. this can be done by having the conn.Log also append line +// to a file (though that would mean everything would have to go through +// conn.Log, which we're not consistently doing yet). the correct thing +// to do then would be to implement a new interface that covers the part +// of log.Logger we use (e.g. Print and Printf), and then have an exported +// conn struct that implements those, so that we could pass a log.Logger +// or the new conn struct everywhere (we wouldn't be passing a log.Logger, +// it's just good to be able to keep the compatibility) func savelogs(conn Pipeliner, starttime int64, hostname string) error { cmd := exec.Command("journalctl", "-u", "bookpipeline", "-n", "all") var stdout, stderr bytes.Buffer diff --git a/cmd/getallhocrs/main.go b/cmd/getallhocrs/main.go index 136f07e..696a5fc 100644 --- a/cmd/getallhocrs/main.go +++ b/cmd/getallhocrs/main.go @@ -68,6 +68,13 @@ func main() { if !strings.HasSuffix(o, ".hocr") { continue } + // skip already downloaded items + _, err = os.Stat(o) + if err == nil || os.IsExist(err) { + log.Println(" Skipping already complete download of", o) + continue + } + log.Println(" Downloading", o) err = conn.Download(conn.WIPStorageId(), o, o) if err != nil { log.Fatalln("Failed to download file", o, err) -- cgit v1.2.1-24-ge1ad