From 676b96f0f6c72a3b84de11521f82a337e59c829e Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 19 Feb 2020 20:50:46 +0000 Subject: Split sampling functionality in pare-gt into a separate function that can be tested (coming soon) --- pare-gt/main.go | 69 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/pare-gt/main.go b/pare-gt/main.go index d07e141..1f397f8 100644 --- a/pare-gt/main.go +++ b/pare-gt/main.go @@ -60,45 +60,17 @@ func inStrSlice(sl []string, s string) bool { return false } -func main() { - flag.Usage = func() { - fmt.Fprintf(flag.CommandLine.Output(), usage) - flag.PrintDefaults() - } - numtopare := flag.Int("n", 10, "Percentage of the ground truth to pare away.") - flag.Parse() - if flag.NArg() != 2 { - flag.Usage() - os.Exit(1) - } - - for _, d := range flag.Args() { - info, err := os.Stat(d) - if err != nil || !info.IsDir() { - log.Fatalln("Error accessing directory", flag.Arg(0), err) - } - } - - var prefixes Prefixes - prefixes = make(Prefixes) - err := filepath.Walk(flag.Arg(0), walker(&prefixes)) - if err != nil { - log.Fatalln("Failed to walk", flag.Arg(0), err) - } - +// samplePrefixes selects random samples for each prefix, proportional +// to the amount of that prefix there are in the whole set, so that a +// total of perctosample% are sampled. +func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) { var total, sample int for _, v := range prefixes { total += len(v) - // fmt.Printf("\n%s:\n%s\n", i, v) } - sample = total / *numtopare - - // filestomove contains the names of files to move minus file extension - var filestomove []string + sample = total / perctosample - // select random samples for each prefix, proportional to - // the amount of that prefix there are in the whole set for _, prefix := range prefixes { len := len(prefix) if len == 1 { @@ -120,6 +92,37 @@ func main() { } } + return +} + +func main() { + flag.Usage = func() { + fmt.Fprintf(flag.CommandLine.Output(), usage) + flag.PrintDefaults() + } + numtopare := flag.Int("n", 10, "Percentage of the ground truth to pare away.") + flag.Parse() + if flag.NArg() != 2 { + flag.Usage() + os.Exit(1) + } + + for _, d := range flag.Args() { + info, err := os.Stat(d) + if err != nil || !info.IsDir() { + log.Fatalln("Error accessing directory", flag.Arg(0), err) + } + } + + var prefixes Prefixes + prefixes = make(Prefixes) + err := filepath.Walk(flag.Arg(0), walker(&prefixes)) + if err != nil { + log.Fatalln("Failed to walk", flag.Arg(0), err) + } + + filestomove := samplePrefixes(*numtopare, prefixes) + for _, f := range filestomove { fmt.Println("Moving ground truth", f) b := path.Base(f) -- cgit v1.2.1-24-ge1ad