Skip to content

Commit 71a705f

Browse files
committed
Use top-k sorted list builder instead of full-list sorts in search indexes.
1 parent 2aadb0a commit 71a705f

File tree

4 files changed

+112
-33
lines changed

4 files changed

+112
-33
lines changed

app/lib/search/mem_index.dart

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import 'package:meta/meta.dart';
1212
import 'package:pub_dev/service/topics/models.dart';
1313
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1414

15+
import '../shared/utils.dart' show TopKSortedListBuilder;
1516
import 'models.dart';
1617
import 'search_service.dart';
1718
import 'text_utils.dart';
@@ -263,6 +264,13 @@ class InMemoryPackageIndex {
263264
// extra item, that will be addressed after the ranking score is determined.
264265
var totalCount = packageScores?.positiveCount() ?? predicateFilterCount;
265266

267+
// Checking if it is worth to calculate the sorted order, estimating the
268+
// total count by overcounting the best name matches.
269+
final maximumTotalCount = totalCount + (bestNameIndex != null ? 1 : 0);
270+
if (maximumTotalCount < query.offset) {
271+
return PackageSearchResult.empty();
272+
}
273+
266274
Iterable<IndexedPackageHit> indexedHits;
267275
switch (query.effectiveOrder) {
268276
case SearchOrder.top:
@@ -285,8 +293,8 @@ class InMemoryPackageIndex {
285293
}
286294
indexedHits = _rankWithValues(
287295
packageScores,
288-
requiredLengthThreshold: query.offset,
289296
bestNameIndex: bestNameIndex ?? -1,
297+
topK: query.offset + query.limit,
290298
);
291299
break;
292300
case SearchOrder.created:
@@ -512,33 +520,31 @@ class InMemoryPackageIndex {
512520
return _TextResults(topApiPages);
513521
}
514522

515-
List<IndexedPackageHit> _rankWithValues(
523+
Iterable<IndexedPackageHit> _rankWithValues(
516524
IndexedScore<String> score, {
517-
// if the item count is fewer than this threshold, an empty list will be returned
518-
required int requiredLengthThreshold,
519-
// When no best name match is applied, this parameter will be `-1`
525+
/// When no best name match is applied, this parameter will be `-1`
520526
required int bestNameIndex,
527+
528+
/// Return (and sort) only the top-k results.
529+
required int topK,
521530
}) {
522-
final list = <IndexedPackageHit>[];
531+
final builder = TopKSortedListBuilder<int>(topK, (aIndex, bIndex) {
532+
if (aIndex == bestNameIndex) return -1;
533+
if (bIndex == bestNameIndex) return 1;
534+
final aScore = score.getValue(aIndex);
535+
final bScore = score.getValue(bIndex);
536+
final scoreCompare = -aScore.compareTo(bScore);
537+
if (scoreCompare != 0) return scoreCompare;
538+
// if two packages got the same score, order by last updated
539+
return _compareUpdated(_documents[aIndex], _documents[bIndex]);
540+
});
523541
for (var i = 0; i < score.length; i++) {
524542
final value = score.getValue(i);
525543
if (value <= 0.0 && i != bestNameIndex) continue;
526-
list.add(IndexedPackageHit(
527-
i, PackageHit(package: score.keys[i], score: value)));
528-
}
529-
if (requiredLengthThreshold > list.length) {
530-
// There is no point to sort or even keep the results, as the search query offset ignores these anyway.
531-
return [];
544+
builder.add(i);
532545
}
533-
list.sort((a, b) {
534-
if (a.index == bestNameIndex) return -1;
535-
if (b.index == bestNameIndex) return 1;
536-
final scoreCompare = -a.hit.score!.compareTo(b.hit.score!);
537-
if (scoreCompare != 0) return scoreCompare;
538-
// if two packages got the same score, order by last updated
539-
return _compareUpdated(_documents[a.index], _documents[b.index]);
540-
});
541-
return list;
546+
return builder.getTopK().map((i) => IndexedPackageHit(
547+
i, PackageHit(package: score.keys[i], score: score.getValue(i))));
542548
}
543549

544550
List<IndexedPackageHit> _rankWithComparator(

app/lib/search/token_index.dart

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import 'dart:math' as math;
66

77
import 'package:meta/meta.dart';
8+
import 'package:pub_dev/shared/utils.dart';
89
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
910

1011
import 'text_utils.dart';
@@ -313,21 +314,16 @@ class IndexedScore<K> {
313314
}
314315

315316
Map<K, double> top(int count, {double? minValue}) {
316-
final list = <int>[];
317-
double? lastValue;
317+
minValue ??= 0.0;
318+
final builder = TopKSortedListBuilder<int>(
319+
count, (a, b) => -_values[a].compareTo(_values[b]));
318320
for (var i = 0; i < length; i++) {
319321
final v = _values[i];
320-
if (minValue != null && v < minValue) continue;
321-
if (list.length == count) {
322-
if (lastValue != null && lastValue >= v) continue;
323-
list[count - 1] = i;
324-
} else {
325-
list.add(i);
326-
}
327-
list.sort((a, b) => -_values[a].compareTo(_values[b]));
328-
lastValue = _values[list.last];
322+
if (v < minValue) continue;
323+
builder.add(i);
329324
}
330-
return Map.fromEntries(list.map((i) => MapEntry(_keys[i], _values[i])));
325+
return Map.fromEntries(
326+
builder.getTopK().map((i) => MapEntry(_keys[i], _values[i])));
331327
}
332328

333329
Map<K, double> toMap() {

app/lib/shared/utils.dart

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,50 @@ class DurationTracker extends LastNTracker<Duration> {
148148
};
149149
}
150150

151+
/// Builds a sorted list of the top-k items using the provided comparator.
152+
///
153+
/// The algorithm uses a binary tree insertion, resulting in O(N * log(K)) comparison.
154+
class TopKSortedListBuilder<T> {
155+
final int _k;
156+
final Comparator<T> _compare;
157+
final _list = <T>[];
158+
159+
TopKSortedListBuilder(this._k, this._compare);
160+
161+
void addAll(Iterable<T> items) {
162+
for (final item in items) {
163+
add(item);
164+
}
165+
}
166+
167+
void add(T item) {
168+
if (_list.length >= _k && _compare(_list.last, item) <= 0) {
169+
return;
170+
}
171+
var start = 0, end = _list.length;
172+
while (start < end) {
173+
final mid = (start + end) >> 1;
174+
if (_compare(_list[mid], item) <= 0) {
175+
start = mid + 1;
176+
} else {
177+
end = mid;
178+
}
179+
}
180+
if (_list.length < _k) {
181+
_list.insert(start, item);
182+
return;
183+
}
184+
for (var i = _list.length - 1; i > start; i--) {
185+
_list[i] = _list[i - 1];
186+
}
187+
_list[start] = item;
188+
}
189+
190+
Iterable<T> getTopK() {
191+
return _list;
192+
}
193+
}
194+
151195
/// Returns the MIME content type based on the name of the file.
152196
String contentType(String name) {
153197
final ext = p.extension(name).replaceAll('.', '');

app/test/shared/utils_test.dart

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,37 @@ void main() {
7373
expect(tracker.getLatency().inMilliseconds, greaterThan(15000));
7474
});
7575
});
76+
77+
group('top-k sorted list', () {
78+
int compare(int a, int b) => -a.compareTo(b);
79+
80+
test('no items', () {
81+
final builder = TopKSortedListBuilder(5, compare);
82+
expect(builder.getTopK().toList(), []);
83+
});
84+
85+
test('single item', () {
86+
final builder = TopKSortedListBuilder(5, compare);
87+
builder.add(1);
88+
expect(builder.getTopK().toList(), [1]);
89+
});
90+
91+
test('three items ascending', () {
92+
final builder = TopKSortedListBuilder(5, compare);
93+
builder.addAll([1, 2, 3]);
94+
expect(builder.getTopK().toList(), [3, 2, 1]);
95+
});
96+
97+
test('three items descending', () {
98+
final builder = TopKSortedListBuilder(5, compare);
99+
builder.addAll([3, 2, 1]);
100+
expect(builder.getTopK().toList(), [3, 2, 1]);
101+
});
102+
103+
test('10 items + repeated', () {
104+
final builder = TopKSortedListBuilder(5, compare);
105+
builder.addAll([1, 10, 2, 9, 3, 8, 4, 7, 6, 5, 9]);
106+
expect(builder.getTopK().toList(), [10, 9, 9, 8, 7]);
107+
});
108+
});
76109
}

0 commit comments

Comments
 (0)