diff options
| author | Nick White <git@njw.name> | 2020-02-20 11:41:24 +0000 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2020-02-20 11:41:24 +0000 | 
| commit | 04f45ca1883161536d92c9600553b0611f0d21ff (patch) | |
| tree | 4c41fb349f1ae43125259aae837de1673c82624f /pare-gt | |
| parent | 62512ca0abb651780a01ba535bdc31f6930d4300 (diff) | |
[pare-gt] Fix sampling formula, make robust in the face of a 100% sample request, and fix up test output
Diffstat (limited to 'pare-gt')
| -rw-r--r-- | pare-gt/main.go | 5 | ||||
| -rw-r--r-- | pare-gt/main_test.go | 8 | 
2 files changed, 8 insertions, 5 deletions
diff --git a/pare-gt/main.go b/pare-gt/main.go index 0a8b067..5e2daa1 100644 --- a/pare-gt/main.go +++ b/pare-gt/main.go @@ -74,7 +74,7 @@ func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string)  		keys = append(keys, i)  	} -	sample = total / perctosample +	sample = (total * perctosample ) / 100  	// This ensures the map is looped over deterministically  	sort.Strings(keys) @@ -85,6 +85,9 @@ func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string)  			continue  		}  		numtoget := int(float64(sample) / float64(total) * float64(len)) +		if numtoget >= len { +			numtoget = len - 1 +		}  		if numtoget < 1 {  			numtoget = 1  		} diff --git a/pare-gt/main_test.go b/pare-gt/main_test.go index f833609..c381a86 100644 --- a/pare-gt/main_test.go +++ b/pare-gt/main_test.go @@ -56,9 +56,9 @@ func TestSamplePrefixes(t *testing.T) {  		perc     int  		expected []string  	}{ -		//{1, []string{""}}, // TODO: fix this; currently causes hang -		{10, []string{"1471-Orthographia-Tortellius_000019.txt", "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt"}}, -		{20, []string{"1471-Orthographia-Tortellius_00002.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt"}}, +		{1, []string{"1471-Orthographia-Tortellius_000019.txt", "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt"}}, +		{10, []string{"1471-Orthographia-Tortellius_00002.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt"}}, +		{20, []string{"1471-Orthographia-Tortellius_00008.txt", "1471-Orthographia-Tortellius_000017.txt", "1471-Orthographia-Tortellius_00006.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_10_59125.txt"}},  	}  	for _, c := range cases { @@ -70,7 +70,7 @@ func TestSamplePrefixes(t *testing.T) {  			}  			for i, v := range c.expected {  				if actual[i] != v { -					t.Fatalf("Difference in expected and actual files (at least in number %d of actual):\n\nExpected:\n%s\n\nActual:\n%s\n", i, c.expected, actual) +					t.Fatalf("Difference in expected and actual files (first difference is in index %d of actual):\n\nExpected:\n%s\n\nActual:\n%s\n", i, c.expected, actual)  				}  			}  		})  | 
