From 04f45ca1883161536d92c9600553b0611f0d21ff Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 20 Feb 2020 11:41:24 +0000 Subject: [pare-gt] Fix sampling formula, make robust in the face of a 100% sample request, and fix up test output --- pare-gt/main.go | 5 ++++- pare-gt/main_test.go | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pare-gt/main.go b/pare-gt/main.go index 0a8b067..5e2daa1 100644 --- a/pare-gt/main.go +++ b/pare-gt/main.go @@ -74,7 +74,7 @@ func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) keys = append(keys, i) } - sample = total / perctosample + sample = (total * perctosample ) / 100 // This ensures the map is looped over deterministically sort.Strings(keys) @@ -85,6 +85,9 @@ func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) continue } numtoget := int(float64(sample) / float64(total) * float64(len)) + if numtoget >= len { + numtoget = len - 1 + } if numtoget < 1 { numtoget = 1 } diff --git a/pare-gt/main_test.go b/pare-gt/main_test.go index f833609..c381a86 100644 --- a/pare-gt/main_test.go +++ b/pare-gt/main_test.go @@ -56,9 +56,9 @@ func TestSamplePrefixes(t *testing.T) { perc int expected []string }{ - //{1, []string{""}}, // TODO: fix this; currently causes hang - {10, []string{"1471-Orthographia-Tortellius_000019.txt", "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt"}}, - {20, []string{"1471-Orthographia-Tortellius_00002.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt"}}, + {1, []string{"1471-Orthographia-Tortellius_000019.txt", "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt"}}, + {10, []string{"1471-Orthographia-Tortellius_00002.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt"}}, + {20, []string{"1471-Orthographia-Tortellius_00008.txt", "1471-Orthographia-Tortellius_000017.txt", "1471-Orthographia-Tortellius_00006.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_10_59125.txt"}}, } for _, c := range cases { @@ -70,7 +70,7 @@ func TestSamplePrefixes(t *testing.T) { } for i, v := range c.expected { if actual[i] != v { - t.Fatalf("Difference in expected and actual files (at least in number %d of actual):\n\nExpected:\n%s\n\nActual:\n%s\n", i, c.expected, actual) + t.Fatalf("Difference in expected and actual files (first difference is in index %d of actual):\n\nExpected:\n%s\n\nActual:\n%s\n", i, c.expected, actual) } } }) -- cgit v1.2.1-24-ge1ad