summaryrefslogtreecommitdiff
path: root/pare-gt
diff options
context:
space:
mode:
Diffstat (limited to 'pare-gt')
-rw-r--r--pare-gt/main.go12
-rw-r--r--pare-gt/main_test.go78
2 files changed, 88 insertions, 2 deletions
diff --git a/pare-gt/main.go b/pare-gt/main.go
index c645480..0a8b067 100644
--- a/pare-gt/main.go
+++ b/pare-gt/main.go
@@ -9,6 +9,7 @@ import (
"path"
"path/filepath"
"strings"
+ "sort"
)
const usage = `Usage: pare-gt [-n num] gtdir movedir
@@ -65,13 +66,20 @@ func inStrSlice(sl []string, s string) bool {
// total of perctosample% are sampled.
func samplePrefixes(perctosample int, prefixes Prefixes) (filestomove []string) {
var total, sample int
- for _, v := range prefixes {
+ var keys []string
+ for i, v := range prefixes {
total += len(v)
+ // needed for determinism
+ sort.Strings(prefixes[i])
+ keys = append(keys, i)
}
sample = total / perctosample
- for _, prefix := range prefixes {
+ // This ensures the map is looped over deterministically
+ sort.Strings(keys)
+ for _, key := range keys {
+ prefix := prefixes[key]
len := len(prefix)
if len == 1 {
continue
diff --git a/pare-gt/main_test.go b/pare-gt/main_test.go
new file mode 100644
index 0000000..f833609
--- /dev/null
+++ b/pare-gt/main_test.go
@@ -0,0 +1,78 @@
+package main
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestSamplePrefixes(t *testing.T) {
+ prefixes := Prefixes{
+ "1471-Orthographia": {
+ "1471-Orthographia-Tortellius_00001.txt",
+ "1471-Orthographia-Tortellius_00002.txt",
+ "1471-Orthographia-Tortellius_00003.txt",
+ "1471-Orthographia-Tortellius_00004.txt",
+ "1471-Orthographia-Tortellius_00005.txt",
+ "1471-Orthographia-Tortellius_00006.txt",
+ "1471-Orthographia-Tortellius_00007.txt",
+ "1471-Orthographia-Tortellius_00008.txt",
+ "1471-Orthographia-Tortellius_00009.txt",
+ "1471-Orthographia-Tortellius_000010.txt",
+ "1471-Orthographia-Tortellius_000011.txt",
+ "1471-Orthographia-Tortellius_000012.txt",
+ "1471-Orthographia-Tortellius_000013.txt",
+ "1471-Orthographia-Tortellius_000014.txt",
+ "1471-Orthographia-Tortellius_000015.txt",
+ "1471-Orthographia-Tortellius_000016.txt",
+ "1471-Orthographia-Tortellius_000017.txt",
+ "1471-Orthographia-Tortellius_000018.txt",
+ "1471-Orthographia-Tortellius_000019.txt",
+ "1471-Orthographia-Tortellius_000020.txt",
+ },
+ "Kallimachos_1509": {
+ "Kallimachos_1509-ShipOfFools-Barclay_00121.txt",
+ "Kallimachos_1509-ShipOfFools-Barclay_00122.txt",
+ "Kallimachos_1509-ShipOfFools-Barclay_00123.txt",
+ "Kallimachos_1509-ShipOfFools-Barclay_00124.txt",
+ "Kallimachos_1509-ShipOfFools-Barclay_00125.txt",
+ "Kallimachos_1509-ShipOfFools-Barclay_00126.txt",
+ },
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4": {
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_10_59125.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_12_49.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_1_415.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_14_6628571428571429.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_16_865.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_17_62.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_18_6366666666666666.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_19_7857142857142857.txt",
+ "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_19_7857142857142857.txt",
+ },
+ }
+
+ cases := []struct {
+ perc int
+ expected []string
+ }{
+ //{1, []string{""}}, // TODO: fix this; currently causes hang
+ {10, []string{"1471-Orthographia-Tortellius_000019.txt", "Kallimachos_1509-ShipOfFools-Barclay_00122.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_13_9033333333333333.txt"}},
+ {20, []string{"1471-Orthographia-Tortellius_00002.txt", "Kallimachos_1509-ShipOfFools-Barclay_00126.txt", "buckets_1678_DuHAMEL_PhilosophiaVetusEtNova_Vol4_0008_bin0.4-copy_line_1_11_27.txt"}},
+ }
+
+ for _, c := range cases {
+ t.Run(fmt.Sprintf("%d%%", c.perc), func(t *testing.T) {
+ actual := samplePrefixes(c.perc, prefixes)
+ if len(c.expected) != len(actual) {
+ t.Fatalf("Number of files picked (%d) differs from expected (%d):\nExpected: %s\nActual: %s\n", len(actual), len(c.expected), c.expected, actual)
+ return
+ }
+ for i, v := range c.expected {
+ if actual[i] != v {
+ t.Fatalf("Difference in expected and actual files (at least in number %d of actual):\n\nExpected:\n%s\n\nActual:\n%s\n", i, c.expected, actual)
+ }
+ }
+ })
+ }
+}