Skip to content

Commit 998bd95

Browse files
committed
Use top-k sorted list builder with min/max-heaps for search.
1 parent 3116ca4 commit 998bd95

File tree

4 files changed

+170
-27
lines changed

4 files changed

+170
-27
lines changed

app/lib/search/heap.dart

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// A data structure that keep satisfies the "heap property". This property
6+
/// dictates that in a max-heap, each node's value is greater than or equal
7+
/// to its children's values, and in a min-heap, each node's value is less
8+
/// than or equal to its children's values.
9+
///
10+
/// The provided comparator decides which kind of heap is being built.
11+
class Heap<T> {
12+
final Comparator<T> _compare;
13+
final _items = <T>[];
14+
15+
Heap(this._compare);
16+
17+
int get length => _items.length;
18+
19+
/// Insert a new [item] at the bottom of the heap, and move it up if needed.
20+
void insert(T item) {
21+
_items.add(item);
22+
_up(_items.length - 1);
23+
}
24+
25+
/// Replace the first entry of the with [item] and move it down if needed.
26+
///
27+
/// WARNING: when maxLength is specified, the heap property will only applied
28+
/// for items before [maxLength] index.
29+
void replaceFirst(T item, [int? maxLength]) {
30+
assert(_items.isNotEmpty);
31+
_items[0] = item;
32+
_down(0, maxLength);
33+
}
34+
35+
void _up(int index) {
36+
final item = _items[index];
37+
while (index > 0) {
38+
final parentIndex = (index - 1) >> 1;
39+
final parent = _items[parentIndex];
40+
if (_compare(parent, item) <= 0) {
41+
return;
42+
}
43+
_items[parentIndex] = item;
44+
_items[index] = parent;
45+
index = parentIndex;
46+
}
47+
}
48+
49+
void _down(int index, [int? maxLength]) {
50+
maxLength ??= _items.length;
51+
final item = _items[index];
52+
while (index < maxLength) {
53+
final leftIndex = (index << 1) + 1;
54+
if (leftIndex >= maxLength) {
55+
return;
56+
}
57+
var childIndex = leftIndex;
58+
final rightIndex = leftIndex + 1;
59+
if (rightIndex < maxLength &&
60+
_compare(_items[leftIndex], _items[rightIndex]) > 0) {
61+
childIndex = rightIndex;
62+
}
63+
if (_compare(item, _items[childIndex]) <= 0) {
64+
return;
65+
}
66+
_items[index] = _items[childIndex];
67+
_items[childIndex] = item;
68+
index = childIndex;
69+
}
70+
}
71+
}
72+
73+
/// Builds a sorted list of the top-k items using the provided comparator.
74+
///
75+
/// The algorithm uses min-heap to select the top-k items, and then builds
76+
/// a max-heap and uses heap sort to return the items in descending order.
77+
class TopKSortedListBuilder<T> {
78+
final int _k;
79+
final Comparator<T> _compare;
80+
final Heap<T> _heap;
81+
82+
TopKSortedListBuilder(this._k, this._compare)
83+
: _heap = Heap<T>((a, b) => -_compare(a, b));
84+
85+
void addAll(Iterable<T> items) {
86+
for (final item in items) {
87+
add(item);
88+
}
89+
}
90+
91+
void add(T item) {
92+
final isAtCapacity = _heap.length >= _k;
93+
if (isAtCapacity && _compare(_heap._items.first, item) <= 0) {
94+
return;
95+
}
96+
if (isAtCapacity) {
97+
_heap.replaceFirst(item);
98+
} else {
99+
_heap.insert(item);
100+
}
101+
}
102+
103+
Iterable<T> getTopK() {
104+
for (var i = _heap.length - 1; i > 0; i--) {
105+
final item = _heap._items[i];
106+
_heap._items[i] = _heap._items[0];
107+
_heap.replaceFirst(item, i);
108+
}
109+
return _heap._items;
110+
}
111+
}

app/lib/search/mem_index.dart

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import 'package:clock/clock.dart';
99
import 'package:collection/collection.dart';
1010
import 'package:logging/logging.dart';
1111
import 'package:meta/meta.dart';
12+
import 'package:pub_dev/search/heap.dart';
1213
import 'package:pub_dev/service/topics/models.dart';
1314
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1415

@@ -292,8 +293,8 @@ class InMemoryPackageIndex {
292293
}
293294
indexedHits = _rankWithValues(
294295
packageScores,
295-
requiredLengthThreshold: query.offset,
296296
bestNameIndex: bestNameIndex ?? -1,
297+
topK: query.offset + query.limit,
297298
);
298299
break;
299300
case SearchOrder.created:
@@ -521,12 +522,13 @@ class InMemoryPackageIndex {
521522

522523
Iterable<IndexedPackageHit> _rankWithValues(
523524
IndexedScore<String> score, {
524-
// if the item count is fewer than this threshold, an empty list will be returned
525-
required int requiredLengthThreshold,
526-
// When no best name match is applied, this parameter will be `-1`
525+
/// When no best name match is applied, this parameter will be `-1`
527526
required int bestNameIndex,
527+
528+
/// Return (and sort) only the top-k results.
529+
required int topK,
528530
}) {
529-
int compare(int aIndex, int bIndex) {
531+
final builder = TopKSortedListBuilder<int>(topK, (aIndex, bIndex) {
530532
if (aIndex == bestNameIndex) return -1;
531533
if (bIndex == bestNameIndex) return 1;
532534
final aScore = score.getValue(aIndex);
@@ -535,20 +537,13 @@ class InMemoryPackageIndex {
535537
if (scoreCompare != 0) return scoreCompare;
536538
// if two packages got the same score, order by last updated
537539
return _compareUpdated(_documents[aIndex], _documents[bIndex]);
538-
}
539-
540-
final list = <int>[];
540+
});
541541
for (var i = 0; i < score.length; i++) {
542542
final value = score.getValue(i);
543543
if (value <= 0.0 && i != bestNameIndex) continue;
544-
list.add(i);
545-
}
546-
if (requiredLengthThreshold > list.length) {
547-
// There is no point to sort or even keep the results, as the search query offset ignores these anyway.
548-
return [];
544+
builder.add(i);
549545
}
550-
list.sort(compare);
551-
return list.map((i) => IndexedPackageHit(
546+
return builder.getTopK().map((i) => IndexedPackageHit(
552547
i, PackageHit(package: score.keys[i], score: score.getValue(i))));
553548
}
554549

app/lib/search/token_index.dart

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import 'dart:math' as math;
66

77
import 'package:meta/meta.dart';
8+
import 'package:pub_dev/search/heap.dart';
89
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
910

1011
import 'text_utils.dart';
@@ -313,21 +314,16 @@ class IndexedScore<K> {
313314
}
314315

315316
Map<K, double> top(int count, {double? minValue}) {
316-
final list = <int>[];
317-
double? lastValue;
317+
minValue ??= 0.0;
318+
final builder = TopKSortedListBuilder<int>(
319+
count, (a, b) => -_values[a].compareTo(_values[b]));
318320
for (var i = 0; i < length; i++) {
319321
final v = _values[i];
320-
if (minValue != null && v < minValue) continue;
321-
if (list.length == count) {
322-
if (lastValue != null && lastValue >= v) continue;
323-
list[count - 1] = i;
324-
} else {
325-
list.add(i);
326-
}
327-
list.sort((a, b) => -_values[a].compareTo(_values[b]));
328-
lastValue = _values[list.last];
322+
if (v < minValue) continue;
323+
builder.add(i);
329324
}
330-
return Map.fromEntries(list.map((i) => MapEntry(_keys[i], _values[i])));
325+
return Map.fromEntries(
326+
builder.getTopK().map((i) => MapEntry(_keys[i], _values[i])));
331327
}
332328

333329
Map<K, double> toMap() {

app/test/search/heap_test.dart

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'package:pub_dev/search/heap.dart';
6+
import 'package:test/test.dart';
7+
8+
void main() {
9+
group('top-k sorted list', () {
10+
int compare(int a, int b) => -a.compareTo(b);
11+
12+
test('no items', () {
13+
final builder = TopKSortedListBuilder(5, compare);
14+
expect(builder.getTopK().toList(), []);
15+
});
16+
17+
test('single item', () {
18+
final builder = TopKSortedListBuilder(5, compare);
19+
builder.add(1);
20+
expect(builder.getTopK().toList(), [1]);
21+
});
22+
23+
test('three items ascending', () {
24+
final builder = TopKSortedListBuilder(5, compare);
25+
builder.addAll([1, 2, 3]);
26+
expect(builder.getTopK().toList(), [3, 2, 1]);
27+
});
28+
29+
test('three items descending', () {
30+
final builder = TopKSortedListBuilder(5, compare);
31+
builder.addAll([3, 2, 1]);
32+
expect(builder.getTopK().toList(), [3, 2, 1]);
33+
});
34+
35+
test('10 items + repeated', () {
36+
final builder = TopKSortedListBuilder(5, compare);
37+
builder.addAll([1, 10, 2, 9, 3, 8, 4, 7, 6, 5, 9]);
38+
expect(builder.getTopK().toList(), [10, 9, 9, 8, 7]);
39+
});
40+
});
41+
}

0 commit comments

Comments
 (0)