From 1045fde0b0e5025ca0c1140949474455418f77f8 Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 9 Feb 2022 16:20:01 +0000 Subject: rescribe: make go generate skip already downloaded files, checking that checksum matches expected for safety --- cmd/rescribe/getembeds.go | 69 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/cmd/rescribe/getembeds.go b/cmd/rescribe/getembeds.go index 91cd480..6ab9fa8 100644 --- a/cmd/rescribe/getembeds.go +++ b/cmd/rescribe/getembeds.go @@ -9,8 +9,12 @@ package main import ( + "bytes" + "crypto/sha256" + "encoding/hex" "fmt" "io" + "io/ioutil" "net/http" "os" "path" @@ -39,19 +43,66 @@ func dl(url string) error { return nil } +// present returns true if the file is present and matches the +// checksum, false otherwise +func present(url string, sum string) bool { + fn := path.Base(url) + _, err := os.Stat(fn) + if err != nil && !os.IsExist(err) { + return false + } + + b, err := ioutil.ReadFile(fn) + if err != nil { + return false + } + + expected, err := hex.DecodeString(sum) + if err != nil { + fmt.Fprintf(os.Stderr, "Error decoding checksum for %s: %v\n", url, err) + os.Exit(1) + } + + actual := sha256.Sum256(b) + + var a []byte + for _, v := range actual { + a = append(a, v) + } + + if !bytes.Equal(a, expected) { + return false + } + + return true +} + func main() { - urls := []string{ - "https://rescribe.xyz/rescribe/embeds/tessdata.20211001.zip", - "https://rescribe.xyz/rescribe/embeds/tesseract-linux-v5.0.0-alpha.20210510.zip", - "https://rescribe.xyz/rescribe/embeds/tesseract-osx-v4.1.1.20191227.zip", - "https://rescribe.xyz/rescribe/embeds/tesseract-osx-m1-v4.1.1.20210802.zip", - "https://rescribe.xyz/rescribe/embeds/tesseract-w32-v5.0.0-alpha.20210506.zip", + urls := []struct{ + url string + sum string + }{ + {"https://rescribe.xyz/rescribe/embeds/tessdata.20211001.zip", "5c90ae69b9e449d85e84b4806a54d6739b572730525010483e512a62a527b030"}, + {"https://rescribe.xyz/rescribe/embeds/tesseract-linux-v5.0.0-alpha.20210510.zip", "81cfba632b8aaf0a00180b1aa62d357d50f343b0e9bd51b941ee14c289ccd889"}, + {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-v4.1.1.20191227.zip", "5f567b95f1dea9d0581ad42ada4d1f1160a38ea22ae338f9efe190015265636b"}, + {"https://rescribe.xyz/rescribe/embeds/tesseract-osx-m1-v4.1.1.20210802.zip", "c9a454633f7e5175e2d50dd939d30a6e5bdfb3b8c78590a08b5aa21edbf32ca4"}, + {"https://rescribe.xyz/rescribe/embeds/tesseract-w32-v5.0.0-alpha.20210506.zip", "96734f3db4bb7c3b9a241ab6d89ab3e8436cea43b1cbbcfb13999497982f63e3"}, } for _, v := range urls { - fmt.Printf("Downloading %s\n", v) - err := dl(v) + if present(v.url, v.sum) { + fmt.Printf("Skipping downloading of already present %s\n", path.Base(v.url)) + continue + } + + fmt.Printf("Downloading %s\n", v.url) + err := dl(v.url) if err != nil { - fmt.Printf("Error downloading %s: %v\n", v, err) + fmt.Fprintf(os.Stderr, "Error downloading %s: %v\n", v.url, err) + os.Exit(1) + } + + if !present(v.url, v.sum) { + fmt.Fprintf(os.Stderr, "Error: downloaded %s does not match expected checksum: %v\n", v.url, err) os.Exit(1) } } -- cgit v1.2.1-24-ge1ad