Skip to content

Commit 47d5f22

Browse files
committed
Add large, realistic tests and group folders that get too big during sort
1 parent 855fb4b commit 47d5f22

File tree

8 files changed

+478503
-100
lines changed

8 files changed

+478503
-100
lines changed

data/tosec-spectrum-sorted-100.txt

Lines changed: 119557 additions & 0 deletions
Large diffs are not rendered by default.

data/tosec-spectrum-sorted-1000.txt

Lines changed: 119557 additions & 0 deletions
Large diffs are not rendered by default.

data/tosec-spectrum-sorted-255.txt

Lines changed: 119557 additions & 0 deletions
Large diffs are not rendered by default.

data/tosec-spectrum.txt

Lines changed: 119557 additions & 0 deletions
Large diffs are not rendered by default.

group.go

Lines changed: 14 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,6 @@ import (
88
"path/filepath"
99
)
1010

11-
// Sort converts a list of paths to files into a mapping from source paths
12-
// to destination paths where no directory in the destinations
13-
// contains any more than size files
14-
func Sort(sources []string, size int) map[string]string {
15-
group := newGroup(sources)
16-
17-
groups := group.sort(size)
18-
19-
return groups.fileMap()
20-
}
21-
2211
type group struct {
2312
entries []entry
2413
prefixSize int
@@ -80,89 +69,6 @@ func (g group) name() string {
8069
return fmt.Sprintf("%s-%s", a, b)
8170
}
8271

83-
// split returns g split into sub-groups using the specific prefixSize.
84-
// the second return value indicates whether the group was able to meet the size constraint
85-
func (g group) split(prefixSize, size int) (groups, bool) {
86-
success := true
87-
88-
counts := make(map[string]int)
89-
prefixes := make([]string, 0)
90-
91-
// Fail if any individual prefix is too big
92-
for _, entry := range g.entries {
93-
prefix := entry.prefix(prefixSize)
94-
95-
if counts[prefix] == 0 {
96-
prefixes = append(prefixes, prefix)
97-
}
98-
99-
counts[prefix]++
100-
101-
if counts[prefix] > size {
102-
success = false
103-
}
104-
}
105-
106-
groups := make(groups, 0)
107-
cur := newGroup([]string{})
108-
cur.prefixSize = prefixSize
109-
cur.path = g.path
110-
pos := 0
111-
112-
// Consolidate
113-
for i, prefix := range prefixes {
114-
// Copy
115-
for j := 0; j < counts[prefix]; j++ {
116-
cur.entries = append(cur.entries, g.entries[pos])
117-
pos++
118-
}
119-
120-
// Check if we split here
121-
if i == len(prefixes)-1 || cur.Len()+counts[prefixes[i+1]] > size {
122-
groups = append(groups, cur)
123-
cur = newGroup([]string{})
124-
cur.prefixSize = prefixSize
125-
cur.path = g.path
126-
}
127-
}
128-
129-
if len(cur.entries) > 0 {
130-
groups = append(groups, cur)
131-
}
132-
133-
if len(groups) > size {
134-
success = false
135-
}
136-
137-
return groups, success
138-
}
139-
140-
func (g group) sort(size int) groups {
141-
if g.Len() <= size {
142-
return groups{g}
143-
}
144-
145-
gs, _ := g.split(g.prefixSize+1, size)
146-
147-
out := make(groups, 0)
148-
149-
for _, sub := range gs {
150-
if sub.Len() <= size {
151-
out = append(out, sub)
152-
} else {
153-
if len(gs) > 1 {
154-
sub.path = filepath.Join(sub.path, sub.name())
155-
}
156-
157-
for _, part := range sub.sort(size) {
158-
out = append(out, part)
159-
}
160-
}
161-
}
162-
163-
return out
164-
}
165-
16672
func (g group) String() string {
16773
path := filepath.Join(g.path, g.name())
16874

@@ -186,9 +92,23 @@ type groups []group
18692
func (gs groups) fileMap() map[string]string {
18793
out := make(map[string]string)
18894

95+
dedup := make(map[string]bool)
96+
18997
for _, g := range gs {
19098
for src, dst := range g.fileMap() {
99+
if dedup[dst] {
100+
dir, fn := filepath.Split(dst)
101+
ext := filepath.Ext(fn)
102+
for i := 2; ; i++ {
103+
dst = filepath.Join(dir, fmt.Sprintf("%s-%d%s", fn[:len(fn)-len(ext)], i, ext))
104+
if !dedup[dst] {
105+
break
106+
}
107+
}
108+
}
109+
191110
out[src] = dst
111+
dedup[dst] = true
192112
}
193113
}
194114

group_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,14 @@ func TestGroupSortBasic(t *testing.T) {
135135
}
136136

137137
if d := cmp.Diff(map[string]string{
138-
"/does/not/matter/and a third": "a/and a third",
139-
"/somewhere/a 4th file": "a/a 4th file",
138+
"/does/not/matter/and a third": "a-f/a/and a third",
139+
"/somewhere/a 4th file": "a-f/a/a 4th file",
140140
}, groups[1].fileMap()); d != "" {
141141
t.Error(d)
142142
}
143143

144144
if d := cmp.Diff(map[string]string{
145-
"/some/path/first": "f/first",
145+
"/some/path/first": "a-f/f/first",
146146
}, groups[2].fileMap()); d != "" {
147147
t.Error(d)
148148
}
@@ -189,9 +189,9 @@ func TestGroups(t *testing.T) {
189189

190190
if d := cmp.Diff(map[string]string{
191191
"/another/path/2nd.file": "#/2nd.file",
192-
"/somewhere/a 4th file": "a/a 4th file",
193-
"/does/not/matter/and a third": "a/and a third",
194-
"/some/path/first": "f/first",
192+
"/somewhere/a 4th file": "a-f/a/a 4th file",
193+
"/does/not/matter/and a third": "a-f/a/and a third",
194+
"/some/path/first": "a-f/f/first",
195195
}, groups.fileMap()); d != "" {
196196
t.Error(d)
197197
}

sort.go

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package retrosort
2+
3+
import (
4+
"math"
5+
"path/filepath"
6+
)
7+
8+
// Sort converts a list of paths to files into a mapping from source paths
9+
// to destination paths where no directory in the destinations
10+
// contains any more than size files
11+
func Sort(sources []string, size int) map[string]string {
12+
group := newGroup(sources)
13+
14+
groups := group.sort(size)
15+
16+
return groups.fileMap()
17+
}
18+
19+
// split returns g split into sub-groups using the specific prefixSize.
20+
// the second return value indicates whether the group was able to meet the size constraint
21+
func (g group) split(prefixSize, size int) (groups, bool) {
22+
success := true
23+
24+
counts := make(map[string]int)
25+
prefixes := make([]string, 0)
26+
27+
// Fail if any individual prefix is too big
28+
for _, entry := range g.entries {
29+
prefix := entry.prefix(prefixSize)
30+
31+
if counts[prefix] == 0 {
32+
prefixes = append(prefixes, prefix)
33+
}
34+
35+
counts[prefix]++
36+
37+
if counts[prefix] > size {
38+
success = false
39+
}
40+
}
41+
42+
groups := make(groups, 0)
43+
cur := newGroup([]string{})
44+
cur.prefixSize = prefixSize
45+
cur.path = g.path
46+
pos := 0
47+
48+
// Consolidate
49+
for i, prefix := range prefixes {
50+
// Copy
51+
for j := 0; j < counts[prefix]; j++ {
52+
cur.entries = append(cur.entries, g.entries[pos])
53+
pos++
54+
}
55+
56+
// Check if we split here
57+
if i == len(prefixes)-1 || cur.Len()+counts[prefixes[i+1]] > size {
58+
groups = append(groups, cur)
59+
cur = newGroup([]string{})
60+
cur.prefixSize = prefixSize
61+
cur.path = g.path
62+
}
63+
}
64+
65+
if len(cur.entries) > 0 {
66+
groups = append(groups, cur)
67+
}
68+
69+
if len(groups) > size {
70+
success = false
71+
}
72+
73+
// Group folders that got too big
74+
if len(groups) > size {
75+
parts := int(math.Ceil(float64(len(groups)) / float64(size)))
76+
77+
for i := 0; i < parts; i++ {
78+
start := i * len(groups) / parts
79+
end := (i+1)*len(groups)/parts - 1
80+
if end > len(groups)-1 {
81+
end = len(groups) - 1
82+
}
83+
84+
if end-start > 0 {
85+
a := groups[start].entries[0].prefix(prefixSize)
86+
b := groups[end].entries[len(groups[end].entries)-1].prefix(prefixSize)
87+
prefix := a + "-" + b
88+
89+
for j := start; j <= end; j++ {
90+
groups[j].path = filepath.Join(groups[j].path, prefix)
91+
}
92+
}
93+
}
94+
}
95+
96+
return groups, success
97+
}
98+
99+
func (g group) sort(size int) groups {
100+
if g.Len() <= size {
101+
return groups{g}
102+
}
103+
104+
gs, _ := g.split(g.prefixSize+1, size)
105+
106+
out := make(groups, 0)
107+
108+
for _, sub := range gs {
109+
if sub.Len() <= size {
110+
out = append(out, sub)
111+
} else {
112+
if len(gs) > 1 {
113+
sub.path = filepath.Join(sub.path, sub.name())
114+
}
115+
116+
for _, part := range sub.sort(size) {
117+
out = append(out, part)
118+
}
119+
}
120+
}
121+
122+
return out
123+
}

0 commit comments

Comments
 (0)