Skip to content

Commit b67e172

Browse files
committed
algorithms: Add bucketSort, for stable, linear-time sorting
Specifically the sort takes linear time so long as there aren't more than linearly-many buckets. For our immediate use case of ranking emoji-autocomplete results, we'll in fact stick to a constant number of buckets.
1 parent e4b5604 commit b67e172

File tree

2 files changed

+157
-0
lines changed

2 files changed

+157
-0
lines changed

lib/model/algorithms.dart

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,67 @@ QueueList<int> setUnion(Iterable<int> xs, Iterable<int> ys) {
116116
}
117117
return result;
118118
}
119+
120+
/// Sort the items by bucket, stably,
121+
/// and if the buckets are few then in linear time.
122+
///
123+
/// The returned list will have the same elements as [xs], ordered by bucket,
124+
/// and elements in each bucket will appear in the same order as in [xs].
125+
/// In other words, the list is the result of a stable sort of [xs] by bucket.
126+
/// (By contrast, Dart's [List.sort] is not guaranteed to be stable.)
127+
///
128+
/// For each element of [xs], the bucket identified by [bucketOf]
129+
/// must be in the range `0 <= bucket < numBuckets`.
130+
/// Repeated calls to [bucketOf] on the same element must return the same value.
131+
///
132+
/// If [bucketOf] returns different answers when called twice for some element,
133+
/// this function's behavior is undefined:
134+
/// it may throw, or may return an arbitrary list.
135+
///
136+
/// The cost of this function is linear in `xs.length` plus [numBuckets].
137+
/// In particular if [numBuckets] is a constant
138+
/// (or more generally is at most a constant multiple of `xs.length`),
139+
/// then this function sorts the items in linear time, O(n).
140+
/// On the other hand if there are many more buckets than elements,
141+
/// consider using a different sorting algorithm.
142+
List<T> bucketSort<T>(Iterable<T> xs, int Function(T) bucketOf, {
143+
required int numBuckets,
144+
}) {
145+
if (xs.isEmpty) return [];
146+
if (numBuckets <= 0) throw StateError("bucketSort: non-positive numBuckets");
147+
148+
final counts = List.generate(numBuckets, (_) => 0);
149+
for (final x in xs) {
150+
final key = bucketOf(x);
151+
_checkBucket(key, numBuckets);
152+
counts[key]++;
153+
}
154+
// Now counts[k] is the number of values with key k.
155+
156+
var partialSum = 0;
157+
for (var k = 0; k < numBuckets; k++) {
158+
final count = counts[k];
159+
counts[k] = partialSum;
160+
partialSum += count;
161+
}
162+
assert(partialSum == xs.length);
163+
// Now counts[k] is the index where the first value with key k should go.
164+
165+
final result = List.generate(xs.length, (_) => xs.first);
166+
for (final x in xs) {
167+
// Each counts[k] is the index where the next value with key k should go.
168+
final key = bucketOf(x);
169+
_checkBucket(key, numBuckets);
170+
final index = counts[key]++;
171+
if (index >= result.length) {
172+
throw StateError("bucketSort: bucketOf gave varying answers on same value");
173+
}
174+
result[index] = x;
175+
}
176+
return result;
177+
}
178+
179+
void _checkBucket(int key, int numBuckets) {
180+
if (key < 0) throw StateError("bucketSort: negative bucket");
181+
if (key >= numBuckets) throw StateError("bucketSort: bucket out of range");
182+
}

test/model/algorithms_test.dart

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import 'dart:math';
12

23
import 'package:checks/checks.dart';
4+
import 'package:collection/collection.dart';
35
import 'package:test/scaffolding.dart';
46
import 'package:zulip/model/algorithms.dart';
57

@@ -55,4 +57,95 @@ void main() {
5557
});
5658
}
5759
});
60+
61+
group('bucketSort', () {
62+
/// Same spec as [bucketSort], except slow: N * B time instead of N + B.
63+
List<T> simpleBucketSort<T>(Iterable<T> xs, int Function(T) bucketOf, {
64+
required int numBuckets,
65+
}) {
66+
return Iterable.generate(numBuckets,
67+
(k) => xs.where((s) => bucketOf(s) == k)).flattenedToList;
68+
}
69+
70+
void checkBucketSort<T>(Iterable<T> xs, {
71+
required int Function(T) bucketOf, required int numBuckets,
72+
}) {
73+
check(bucketSort(xs, bucketOf, numBuckets: numBuckets)).deepEquals(
74+
simpleBucketSort<T>(xs, bucketOf, numBuckets: numBuckets));
75+
}
76+
77+
int stringBucket(String s) => s.codeUnits.last - '0'.codeUnits.single;
78+
79+
test('explicit result, interleaved: 4 elements, 2 buckets', () {
80+
check(bucketSort(['a1', 'd0', 'c1', 'b0'], stringBucket, numBuckets: 2))
81+
.deepEquals(['d0', 'b0', 'a1', 'c1']);
82+
});
83+
84+
List<_SortablePair> generatePairs(Iterable<int> keys) {
85+
var token = 0;
86+
return keys.map((k) => _SortablePair(k, "${token++}")).toList();
87+
}
88+
89+
void checkSortPairs(int numBuckets, Iterable<int> keys) {
90+
checkBucketSort(numBuckets: numBuckets, bucketOf: (p) => p.key,
91+
generatePairs(keys));
92+
}
93+
94+
test('empty list, zero buckets', () {
95+
checkSortPairs(0, []);
96+
});
97+
98+
test('empty, some buckets', () {
99+
checkSortPairs(3, []);
100+
});
101+
102+
test('interleaved: 4 elements, 2 buckets', () {
103+
checkSortPairs(2, [1, 0, 1, 0]);
104+
});
105+
106+
test('some buckets empty: 10 elements in 3 of 10 buckets', () {
107+
checkSortPairs(10, [9, 9, 9, 5, 5, 5, 1, 1, 1, 1]);
108+
});
109+
110+
test('one big bucket', () {
111+
checkSortPairs(1, Iterable.generate(100, (_) => 0));
112+
});
113+
114+
const seed = 4321;
115+
116+
Iterable<int> randomKeys({required int numBuckets, required int length}) {
117+
final rand = Random(seed);
118+
return Iterable.generate(length, (_) => rand.nextInt(numBuckets));
119+
}
120+
121+
test('long random list, 1000 in 2 buckets', () {
122+
checkSortPairs(2, randomKeys(numBuckets: 2, length: 1000));
123+
});
124+
125+
test('long random list, 1000 in 1000 buckets', () {
126+
checkSortPairs(1000, randomKeys(numBuckets: 1000, length: 1000));
127+
});
128+
129+
test('sparse random list, 100 in 1000 buckets', () {
130+
checkSortPairs(1000, randomKeys(numBuckets: 1000, length: 100));
131+
});
132+
});
133+
}
134+
135+
class _SortablePair {
136+
_SortablePair(this.key, this.tag);
137+
138+
final int key;
139+
final String tag;
140+
141+
@override
142+
bool operator ==(Object other) {
143+
return other is _SortablePair && key == other.key && tag == other.tag;
144+
}
145+
146+
@override
147+
int get hashCode => Object.hash(key, tag);
148+
149+
@override
150+
String toString() => "$tag:$key";
58151
}

0 commit comments

Comments
 (0)