summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2020-02-19 20:50:46 +0000
committerNick White <git@njw.name>2020-02-19 20:50:46 +0000
commit676b96f0f6c72a3b84de11521f82a337e59c829e (patch)
treedc728f8b58d34a751e6650a7641e8c6e2014e9f4
parent01d5636bbb33edcace579b0026c89a86536b741d (diff)
Split sampling functionality in pare-gt into a separate function that can be tested (coming soon)
-rw-r--r--pare-gt/main.go69
1 files changed, 36 insertions, 33 deletions
diff --git a/pare-gt/main.go b/pare-gt/main.go
index d07e141..1f397f8 100644
--- a/pare-gt/main.go
+++ b/pare-gt/main.go
@@ -60,45 +60,17 @@ func inStrSlice(sl []string, s string) bool {
return false
}
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(flag.CommandLine.Output(), usage)
- flag.PrintDefaults()
- }
- numtopare := flag.Int("n", 10, "Percentage of the ground truth to pare away.")
- flag.Parse()
- if flag.NArg() != 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- for _, d := range flag.Args() {
- info, err := os.Stat(d)
- if err != nil || !info.IsDir() {
- log.Fatalln("Error accessing directory", flag.Arg(0), err)
- }
- }
-
- var prefixes Prefixes
- prefixes = make(Prefixes)
- err := filepath.Walk(flag.Arg(0), walker(&prefixes))
- if err != nil {
- log.Fatalln("Failed to walk", flag.Arg(0), err)
- }
-
+// samplePrefixes selects random samples for each prefix, proportional
+// to the amount of that prefix there are in the whole set, so that a
+// total of perctosample% are sampled.
+func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) {
var total, sample int
for _, v := range prefixes {
total += len(v)
- // fmt.Printf("\n%s:\n%s\n", i, v)
}
- sample = total / *numtopare
-
- // filestomove contains the names of files to move minus file extension
- var filestomove []string
+ sample = total / perctosample
- // select random samples for each prefix, proportional to
- // the amount of that prefix there are in the whole set
for _, prefix := range prefixes {
len := len(prefix)
if len == 1 {
@@ -120,6 +92,37 @@ func main() {
}
}
+ return
+}
+
+func main() {
+ flag.Usage = func() {
+ fmt.Fprintf(flag.CommandLine.Output(), usage)
+ flag.PrintDefaults()
+ }
+ numtopare := flag.Int("n", 10, "Percentage of the ground truth to pare away.")
+ flag.Parse()
+ if flag.NArg() != 2 {
+ flag.Usage()
+ os.Exit(1)
+ }
+
+ for _, d := range flag.Args() {
+ info, err := os.Stat(d)
+ if err != nil || !info.IsDir() {
+ log.Fatalln("Error accessing directory", flag.Arg(0), err)
+ }
+ }
+
+ var prefixes Prefixes
+ prefixes = make(Prefixes)
+ err := filepath.Walk(flag.Arg(0), walker(&prefixes))
+ if err != nil {
+ log.Fatalln("Failed to walk", flag.Arg(0), err)
+ }
+
+ filestomove := samplePrefixes(*numtopare, prefixes)
+
for _, f := range filestomove {
fmt.Println("Moving ground truth", f)
b := path.Base(f)