diff options
| author | Nick White <git@njw.name> | 2020-02-19 20:50:46 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2020-02-19 20:50:46 +0000 | 
| commit | 676b96f0f6c72a3b84de11521f82a337e59c829e (patch) | |
| tree | dc728f8b58d34a751e6650a7641e8c6e2014e9f4 /pare-gt/main.go | |
| parent | 01d5636bbb33edcace579b0026c89a86536b741d (diff) | |
Split sampling functionality in pare-gt into a separate function that can be tested (coming soon)
Diffstat (limited to 'pare-gt/main.go')
| -rw-r--r-- | pare-gt/main.go | 69 | 
1 files changed, 36 insertions, 33 deletions
| diff --git a/pare-gt/main.go b/pare-gt/main.go index d07e141..1f397f8 100644 --- a/pare-gt/main.go +++ b/pare-gt/main.go @@ -60,45 +60,17 @@ func inStrSlice(sl []string, s string) bool {  	return false  } -func main() { -	flag.Usage = func() { -		fmt.Fprintf(flag.CommandLine.Output(), usage) -		flag.PrintDefaults() -	} -	numtopare := flag.Int("n", 10, "Percentage of the ground truth to pare away.") -	flag.Parse() -	if flag.NArg() != 2 { -		flag.Usage() -		os.Exit(1) -	} - -	for _, d := range flag.Args() { -		info, err := os.Stat(d) -		if err != nil || !info.IsDir() { -			log.Fatalln("Error accessing directory", flag.Arg(0), err) -		} -	} - -	var prefixes Prefixes -	prefixes = make(Prefixes) -	err := filepath.Walk(flag.Arg(0), walker(&prefixes)) -	if err != nil { -		log.Fatalln("Failed to walk", flag.Arg(0), err) -	} - +// samplePrefixes selects random samples for each prefix, proportional +// to the amount of that prefix there are in the whole set, so that a +// total of perctosample% are sampled. +func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) {  	var total, sample int  	for _, v := range prefixes {  		total += len(v) -	//	fmt.Printf("\n%s:\n%s\n", i, v)  	} -	sample = total / *numtopare - -	// filestomove contains the names of files to move minus file extension -	var filestomove []string +	sample = total / perctosample -	// select random samples for each prefix, proportional to -	// the amount of that prefix there are in the whole set  	for _, prefix := range prefixes {  		len := len(prefix)  		if len == 1 { @@ -120,6 +92,37 @@ func main() {  		}  	} +	return +} + +func main() { +	flag.Usage = func() { +		fmt.Fprintf(flag.CommandLine.Output(), usage) +		flag.PrintDefaults() +	} +	numtopare := flag.Int("n", 10, "Percentage of the ground truth to pare away.") +	flag.Parse() +	if flag.NArg() != 2 { +		flag.Usage() +		os.Exit(1) +	} + +	for _, d := range flag.Args() { +		info, err := os.Stat(d) +		if err != nil || !info.IsDir() { +			log.Fatalln("Error accessing directory", flag.Arg(0), err) +		} +	} + +	var prefixes Prefixes +	prefixes = make(Prefixes) +	err := filepath.Walk(flag.Arg(0), walker(&prefixes)) +	if err != nil { +		log.Fatalln("Failed to walk", flag.Arg(0), err) +	} + +	filestomove := samplePrefixes(*numtopare, prefixes) +  	for _, f := range filestomove {  		fmt.Println("Moving ground truth", f)  		b := path.Base(f) | 
