Skip to content

Commit b53cca5

Browse files
committed
Use top-k sorted list builder with min/max-heaps for search.
1 parent 3116ca4 commit b53cca5

File tree

4 files changed

+171
-27
lines changed

4 files changed

+171
-27
lines changed

app/lib/search/heap.dart

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// A data structure that keep satisfies the "heap property". This property
6+
/// dictates that in a max-heap, each node's value is greater than or equal
7+
/// to its children's values, and in a min-heap, each node's value is less
8+
/// than or equal to its children's values.
9+
///
10+
/// The provided comparator decides which kind of heap is being built.
11+
class _Heap<T> {
12+
final Comparator<T> _compare;
13+
final _items = <T>[];
14+
15+
_Heap(this._compare);
16+
17+
int get length => _items.length;
18+
19+
void _pushDown(int index) {
20+
final maxLength = _items.length;
21+
final item = _items[index];
22+
while (index < maxLength) {
23+
final leftIndex = (index << 1) + 1;
24+
if (leftIndex >= maxLength) {
25+
return;
26+
}
27+
var childIndex = leftIndex;
28+
final rightIndex = leftIndex + 1;
29+
if (rightIndex < maxLength &&
30+
_compare(_items[leftIndex], _items[rightIndex]) > 0) {
31+
childIndex = rightIndex;
32+
}
33+
if (_compare(item, _items[childIndex]) <= 0) {
34+
return;
35+
}
36+
_items[index] = _items[childIndex];
37+
_items[childIndex] = item;
38+
index = childIndex;
39+
}
40+
}
41+
42+
bool _isValidHeap() {
43+
for (var i = 1; i < _items.length; i++) {
44+
final parentIndex = (i - 1) >> 1;
45+
if (_compare(_items[parentIndex], _items[i]) > 0) {
46+
print(parentIndex);
47+
print(_items);
48+
return false;
49+
}
50+
}
51+
return true;
52+
}
53+
}
54+
55+
/// Builds a sorted list of the top-k items using the provided comparator.
56+
///
57+
/// The algorithm uses min-heap to select the top-k items, and then builds
58+
/// a max-heap and uses heap sort to return the items in descending order.
59+
class TopKSortedListBuilder<T> {
60+
final int _k;
61+
final _Heap<T> _heap;
62+
63+
TopKSortedListBuilder(this._k, Comparator<T> compare)
64+
: _heap = _Heap<T>(compare);
65+
66+
void addAll(Iterable<T> items) {
67+
for (final item in items) {
68+
add(item);
69+
}
70+
}
71+
72+
void add(T item) {
73+
_heap._items.add(item);
74+
}
75+
76+
/// Gets and removes the top-k items from the current list.
77+
Iterable<T> getTopK() sync* {
78+
if (_heap._items.isEmpty) {
79+
return;
80+
}
81+
for (var i = (_heap._items.length >> 1); i >= 0; i--) {
82+
_heap._pushDown(i);
83+
}
84+
assert(_heap._isValidHeap());
85+
var count = _k;
86+
while (count > 0 && _heap._items.isNotEmpty) {
87+
yield _heap._items[0];
88+
count--;
89+
final last = _heap._items.removeLast();
90+
if (_heap._items.isEmpty) {
91+
break;
92+
}
93+
_heap._items[0] = last;
94+
_heap._pushDown(0);
95+
}
96+
}
97+
}

app/lib/search/mem_index.dart

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import 'package:clock/clock.dart';
99
import 'package:collection/collection.dart';
1010
import 'package:logging/logging.dart';
1111
import 'package:meta/meta.dart';
12+
import 'package:pub_dev/search/heap.dart';
1213
import 'package:pub_dev/service/topics/models.dart';
1314
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1415

@@ -292,8 +293,8 @@ class InMemoryPackageIndex {
292293
}
293294
indexedHits = _rankWithValues(
294295
packageScores,
295-
requiredLengthThreshold: query.offset,
296296
bestNameIndex: bestNameIndex ?? -1,
297+
topK: query.offset + query.limit,
297298
);
298299
break;
299300
case SearchOrder.created:
@@ -521,12 +522,13 @@ class InMemoryPackageIndex {
521522

522523
Iterable<IndexedPackageHit> _rankWithValues(
523524
IndexedScore<String> score, {
524-
// if the item count is fewer than this threshold, an empty list will be returned
525-
required int requiredLengthThreshold,
526-
// When no best name match is applied, this parameter will be `-1`
525+
/// When no best name match is applied, this parameter will be `-1`
527526
required int bestNameIndex,
527+
528+
/// Return (and sort) only the top-k results.
529+
required int topK,
528530
}) {
529-
int compare(int aIndex, int bIndex) {
531+
final builder = TopKSortedListBuilder<int>(topK, (aIndex, bIndex) {
530532
if (aIndex == bestNameIndex) return -1;
531533
if (bIndex == bestNameIndex) return 1;
532534
final aScore = score.getValue(aIndex);
@@ -535,20 +537,13 @@ class InMemoryPackageIndex {
535537
if (scoreCompare != 0) return scoreCompare;
536538
// if two packages got the same score, order by last updated
537539
return _compareUpdated(_documents[aIndex], _documents[bIndex]);
538-
}
539-
540-
final list = <int>[];
540+
});
541541
for (var i = 0; i < score.length; i++) {
542542
final value = score.getValue(i);
543543
if (value <= 0.0 && i != bestNameIndex) continue;
544-
list.add(i);
545-
}
546-
if (requiredLengthThreshold > list.length) {
547-
// There is no point to sort or even keep the results, as the search query offset ignores these anyway.
548-
return [];
544+
builder.add(i);
549545
}
550-
list.sort(compare);
551-
return list.map((i) => IndexedPackageHit(
546+
return builder.getTopK().map((i) => IndexedPackageHit(
552547
i, PackageHit(package: score.keys[i], score: score.getValue(i))));
553548
}
554549

app/lib/search/token_index.dart

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import 'dart:math' as math;
66

77
import 'package:meta/meta.dart';
8+
import 'package:pub_dev/search/heap.dart';
89
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
910

1011
import 'text_utils.dart';
@@ -313,21 +314,16 @@ class IndexedScore<K> {
313314
}
314315

315316
Map<K, double> top(int count, {double? minValue}) {
316-
final list = <int>[];
317-
double? lastValue;
317+
minValue ??= 0.0;
318+
final builder = TopKSortedListBuilder<int>(
319+
count, (a, b) => -_values[a].compareTo(_values[b]));
318320
for (var i = 0; i < length; i++) {
319321
final v = _values[i];
320-
if (minValue != null && v < minValue) continue;
321-
if (list.length == count) {
322-
if (lastValue != null && lastValue >= v) continue;
323-
list[count - 1] = i;
324-
} else {
325-
list.add(i);
326-
}
327-
list.sort((a, b) => -_values[a].compareTo(_values[b]));
328-
lastValue = _values[list.last];
322+
if (v < minValue) continue;
323+
builder.add(i);
329324
}
330-
return Map.fromEntries(list.map((i) => MapEntry(_keys[i], _values[i])));
325+
return Map.fromEntries(
326+
builder.getTopK().map((i) => MapEntry(_keys[i], _values[i])));
331327
}
332328

333329
Map<K, double> toMap() {

app/test/search/heap_test.dart

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:math';
6+
7+
import 'package:pub_dev/search/heap.dart';
8+
import 'package:test/test.dart';
9+
10+
void main() {
11+
group('top-k sorted list', () {
12+
int compare(int a, int b) => -a.compareTo(b);
13+
14+
test('no items', () {
15+
final builder = TopKSortedListBuilder(5, compare);
16+
expect(builder.getTopK().toList(), []);
17+
});
18+
19+
test('single item', () {
20+
final builder = TopKSortedListBuilder(5, compare);
21+
builder.add(1);
22+
expect(builder.getTopK().toList(), [1]);
23+
});
24+
25+
test('three items ascending', () {
26+
final builder = TopKSortedListBuilder(5, compare);
27+
builder.addAll([1, 2, 3]);
28+
expect(builder.getTopK().toList(), [3, 2, 1]);
29+
});
30+
31+
test('three items descending', () {
32+
final builder = TopKSortedListBuilder(5, compare);
33+
builder.addAll([3, 2, 1]);
34+
expect(builder.getTopK().toList(), [3, 2, 1]);
35+
});
36+
37+
test('10 items + repeated', () {
38+
final builder = TopKSortedListBuilder(5, compare);
39+
builder.addAll([1, 10, 2, 9, 3, 8, 4, 7, 6, 5, 9]);
40+
expect(builder.getTopK().toList(), [10, 9, 9, 8, 7]);
41+
});
42+
43+
test('randomized verification', () {
44+
for (var i = 0; i < 1000; i++) {
45+
final r = Random(i);
46+
final length = 1000 + r.nextInt(1000);
47+
final k = 10 + r.nextInt(200);
48+
final items = List.generate(length, (i) => i);
49+
final builder = TopKSortedListBuilder(k, compare);
50+
builder.addAll(items);
51+
final result = builder.getTopK().toList();
52+
expect(result, List.generate(k, (i) => length - 1 - i));
53+
}
54+
});
55+
});
56+
}

0 commit comments

Comments
 (0)