From 62512ca0abb651780a01ba535bdc31f6930d4300 Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Thu, 20 Feb 2020 11:25:05 +0000
Subject: [pare-gt] Add some tests, and make deterministic

These tests have uncovered at least 2 bugs that haven't yet been squashed:
- 1% selection hangs
- 20% selection only takes as many as 10%
---
 pare-gt/main.go      | 12 ++++++--
 pare-gt/main_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 pare-gt/main_test.go

(limited to 'pare-gt')

diff --git a/pare-gt/main.go b/pare-gt/main.go
index c645480..0a8b067 100644
--- a/pare-gt/main.go
+++ b/pare-gt/main.go
@@ -9,6 +9,7 @@ import (
 	"path"
 	"path/filepath"
 	"strings"
+	"sort"
 )
 
 const usage = `Usage: pare-gt [-n num] gtdir movedir
@@ -65,13 +66,20 @@ func inStrSlice(sl []string, s string) bool {
 // total of perctosample% are sampled.
 func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) {
 	var total, sample int
-	for _, v := range prefixes {
+	var keys []string
+	for i, v := range prefixes {
 		total += len(v)
+		// needed for determinism
+		sort.Strings(prefixes[i])
+		keys = append(keys, i)
 	}
 
 	sample = total / perctosample
 
-	for _, prefix := range prefixes {
+	// This ensures the map is looped over deterministically
+	sort.Strings(keys)
+	for _, key := range keys {
+		prefix := prefixes[key]
 		len := len(prefix)
 		if len == 1 {
 			continue
diff --git a/pare-gt/main_test.go b/pare-gt/main_test.go
new file mode 100644
index 0000000..f833609
--- /dev/null
+++ b/pare-gt/main_test.go
@@ -0,0 +1,78 @@
+package main
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestSamplePrefixes(t *testing.T) {
+	prefixes := Prefixes{
+		"1471-Orthographia": {
+			"1471-Orthographia-Tortellius_00001.txt",
+			"1471-Orthographia-Tortellius_00002.txt",
+			"1471-Orthographia-Tortellius_00003.txt",
+			"1471-Orthographia-Tortellius_00004.txt",
+			"1471-Orthographia-Tortellius_00005.txt",
+			"1471-Orthographia-Tortellius_00006.txt",
+			"1471-Orthographia-Tortellius_00007.txt",
+			"1471-Orthographia-Tortellius_00008.txt",
+			"1471-Orthographia-Tortellius_00009.txt",
+			"1471-Orthographia-Tortellius_000010.txt",
+			"1471-Orthographia-Tortellius_000011.txt",
+			"1471-Orthographia-Tortellius_000012.txt",
+			"1471-Orthographia-Tortellius_000013.txt",
+			"1471-Orthographia-Tortellius_000014.txt",
+			"1471-Orthographia-Tortellius_000015.txt",
+			"1471-Orthographia-Tortellius_000016.txt",
+			"1471-Orthographia-Tortellius_000017.txt",
+			"1471-Orthographia-Tortellius_000018.txt",
+			"1471-Orthographia-Tortellius_000019.txt",
+			"1471-Orthographia-Tortellius_000020.txt",
+		},
+		"Kallimachos_1509": {
+			"Kallimachos_1509-ShipOfFools-Barclay_00121.txt",
+			"Kallimachos_1509-ShipOfFools-Barclay_00122.txt",
+			"Kallimachos_1509-ShipOfFools-Barclay_00123.txt",
+			"Kallimachos_1509-ShipOfFools-Barclay_00124.txt",
+			"Kallimachos_1509-ShipOfFools-Barclay_00125.txt",
+			"Kallimachos_1509-ShipOfFools-Barclay_00126.txt",
+		},
+		"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4": {
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_10_59125.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_12_49.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_1_415.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_14_6628571428571429.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_16_865.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_17_62.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_18_6366666666666666.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_19_7857142857142857.txt",
+			"buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_19_7857142857142857.txt",
+		},
+	}
+
+	cases := []struct {
+		perc     int
+		expected []string
+	}{
+		//{1, []string{""}}, // TODO: fix this; currently causes hang
+		{10, []string{"1471-Orthographia-Tortellius_000019.txt", "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt"}},
+		{20, []string{"1471-Orthographia-Tortellius_00002.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt"}},
+	}
+
+	for _, c := range cases {
+		t.Run(fmt.Sprintf("%d%%", c.perc), func(t *testing.T) {
+			actual := samplePrefixes(c.perc, prefixes)
+			if len(c.expected) != len(actual) {
+				t.Fatalf("Number of files picked (%d) differs from expected (%d):\nExpected: %s\nActual: %s\n", len(actual), len(c.expected), c.expected, actual)
+				return
+			}
+			for i, v := range c.expected {
+				if actual[i] != v {
+					t.Fatalf("Difference in expected and actual files (at least in number %d of actual):\n\nExpected:\n%s\n\nActual:\n%s\n", i, c.expected, actual)
+				}
+			}
+		})
+	}
+}
-- 
cgit v1.2.1-24-ge1ad