From 62512ca0abb651780a01ba535bdc31f6930d4300 Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 20 Feb 2020 11:25:05 +0000 Subject: [pare-gt] Add some tests, and make deterministic These tests have uncovered at least 2 bugs that haven't yet been squashed: - 1% selection hangs - 20% selection only takes as many as 10% --- pare-gt/main.go | 12 ++++++-- pare-gt/main_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 pare-gt/main_test.go diff --git a/pare-gt/main.go b/pare-gt/main.go index c645480..0a8b067 100644 --- a/pare-gt/main.go +++ b/pare-gt/main.go @@ -9,6 +9,7 @@ import ( "path" "path/filepath" "strings" + "sort" ) const usage = `Usage: pare-gt [-n num] gtdir movedir @@ -65,13 +66,20 @@ func inStrSlice(sl []string, s string) bool { // total of perctosample% are sampled. func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) { var total, sample int - for _, v := range prefixes { + var keys []string + for i, v := range prefixes { total += len(v) + // needed for determinism + sort.Strings(prefixes[i]) + keys = append(keys, i) } sample = total / perctosample - for _, prefix := range prefixes { + // This ensures the map is looped over deterministically + sort.Strings(keys) + for _, key := range keys { + prefix := prefixes[key] len := len(prefix) if len == 1 { continue diff --git a/pare-gt/main_test.go b/pare-gt/main_test.go new file mode 100644 index 0000000..f833609 --- /dev/null +++ b/pare-gt/main_test.go @@ -0,0 +1,78 @@ +package main + +import ( + "fmt" + "testing" +) + +func TestSamplePrefixes(t *testing.T) { + prefixes := Prefixes{ + "1471-Orthographia": { + "1471-Orthographia-Tortellius_00001.txt", + "1471-Orthographia-Tortellius_00002.txt", + "1471-Orthographia-Tortellius_00003.txt", + "1471-Orthographia-Tortellius_00004.txt", + "1471-Orthographia-Tortellius_00005.txt", + "1471-Orthographia-Tortellius_00006.txt", + "1471-Orthographia-Tortellius_00007.txt", + "1471-Orthographia-Tortellius_00008.txt", + "1471-Orthographia-Tortellius_00009.txt", + "1471-Orthographia-Tortellius_000010.txt", + "1471-Orthographia-Tortellius_000011.txt", + "1471-Orthographia-Tortellius_000012.txt", + "1471-Orthographia-Tortellius_000013.txt", + "1471-Orthographia-Tortellius_000014.txt", + "1471-Orthographia-Tortellius_000015.txt", + "1471-Orthographia-Tortellius_000016.txt", + "1471-Orthographia-Tortellius_000017.txt", + "1471-Orthographia-Tortellius_000018.txt", + "1471-Orthographia-Tortellius_000019.txt", + "1471-Orthographia-Tortellius_000020.txt", + }, + "Kallimachos_1509": { + "Kallimachos_1509-ShipOfFools-Barclay_00121.txt", + "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", + "Kallimachos_1509-ShipOfFools-Barclay_00123.txt", + "Kallimachos_1509-ShipOfFools-Barclay_00124.txt", + "Kallimachos_1509-ShipOfFools-Barclay_00125.txt", + "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", + }, + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4": { + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_10_59125.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_12_49.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_1_415.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_14_6628571428571429.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_16_865.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_17_62.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_18_6366666666666666.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_19_7857142857142857.txt", + "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_19_7857142857142857.txt", + }, + } + + cases := []struct { + perc int + expected []string + }{ + //{1, []string{""}}, // TODO: fix this; currently causes hang + {10, []string{"1471-Orthographia-Tortellius_000019.txt", "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt"}}, + {20, []string{"1471-Orthographia-Tortellius_00002.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt"}}, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("%d%%", c.perc), func(t *testing.T) { + actual := samplePrefixes(c.perc, prefixes) + if len(c.expected) != len(actual) { + t.Fatalf("Number of files picked (%d) differs from expected (%d):\nExpected: %s\nActual: %s\n", len(actual), len(c.expected), c.expected, actual) + return + } + for i, v := range c.expected { + if actual[i] != v { + t.Fatalf("Difference in expected and actual files (at least in number %d of actual):\n\nExpected:\n%s\n\nActual:\n%s\n", i, c.expected, actual) + } + } + }) + } +} -- cgit v1.2.1-24-ge1ad