Skip to content

Commit e61f0a9

Browse files
committed
Use top-k sorted list builder with min/max-heaps for search.
1 parent 3116ca4 commit e61f0a9

File tree

5 files changed

+162
-31
lines changed

5 files changed

+162
-31
lines changed

app/lib/search/heap.dart

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// A data structure that keep satisfies the "heap property". This property
6+
/// dictates that in a max-heap, each node's value is greater than or equal
7+
/// to its children's values, and in a min-heap, each node's value is less
8+
/// than or equal to its children's values.
9+
///
10+
/// The provided comparator decides which kind of heap is being built.
11+
class _Heap<T> {
12+
final Comparator<T> _compare;
13+
final _items = <T>[];
14+
15+
_Heap(this._compare);
16+
17+
int get length => _items.length;
18+
19+
void _up(int index) {
20+
final item = _items[index];
21+
while (index > 0) {
22+
final parentIndex = (index - 1) >> 1;
23+
final parent = _items[parentIndex];
24+
if (_compare(parent, item) <= 0) {
25+
return;
26+
}
27+
_items[parentIndex] = item;
28+
_items[index] = parent;
29+
index = parentIndex;
30+
}
31+
}
32+
33+
void _down(int index) {
34+
final maxLength = _items.length;
35+
final item = _items[index];
36+
while (index < maxLength) {
37+
final leftIndex = (index << 1) + 1;
38+
if (leftIndex >= maxLength) {
39+
return;
40+
}
41+
var childIndex = leftIndex;
42+
final rightIndex = leftIndex + 1;
43+
if (rightIndex < maxLength &&
44+
_compare(_items[leftIndex], _items[rightIndex]) > 0) {
45+
childIndex = rightIndex;
46+
}
47+
if (_compare(item, _items[childIndex]) <= 0) {
48+
return;
49+
}
50+
_items[index] = _items[childIndex];
51+
_up(index);
52+
_items[childIndex] = item;
53+
index = childIndex;
54+
}
55+
}
56+
}
57+
58+
/// Builds a sorted list of the top-k items using the provided comparator.
59+
///
60+
/// The algorithm uses min-heap to select the top-k items, and then builds
61+
/// a max-heap and uses heap sort to return the items in descending order.
62+
class TopKSortedListBuilder<T> {
63+
final int _k;
64+
final _Heap<T> _heap;
65+
66+
TopKSortedListBuilder(this._k, Comparator<T> compare)
67+
: _heap = _Heap<T>(compare);
68+
69+
void addAll(Iterable<T> items) {
70+
for (final item in items) {
71+
add(item);
72+
}
73+
}
74+
75+
void add(T item) {
76+
_heap._items.add(item);
77+
}
78+
79+
/// Gets and removes the top-k items from the current list.
80+
Iterable<T> getTopK() sync* {
81+
if (_heap._items.isEmpty) {
82+
return;
83+
}
84+
for (var i = _heap._items.length >> 1; i >= 0; i--) {
85+
_heap._down(i);
86+
}
87+
var count = _k;
88+
while (count > 0 && _heap._items.isNotEmpty) {
89+
yield _heap._items[0];
90+
count--;
91+
final last = _heap._items.removeLast();
92+
if (_heap._items.isEmpty) {
93+
break;
94+
}
95+
_heap._items[0] = last;
96+
_heap._down(0);
97+
}
98+
}
99+
}

app/lib/search/mem_index.dart

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import 'package:clock/clock.dart';
99
import 'package:collection/collection.dart';
1010
import 'package:logging/logging.dart';
1111
import 'package:meta/meta.dart';
12+
import 'package:pub_dev/search/heap.dart';
1213
import 'package:pub_dev/service/topics/models.dart';
1314
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1415

@@ -292,8 +293,8 @@ class InMemoryPackageIndex {
292293
}
293294
indexedHits = _rankWithValues(
294295
packageScores,
295-
requiredLengthThreshold: query.offset,
296296
bestNameIndex: bestNameIndex ?? -1,
297+
topK: query.offset + query.limit,
297298
);
298299
break;
299300
case SearchOrder.created:
@@ -521,12 +522,13 @@ class InMemoryPackageIndex {
521522

522523
Iterable<IndexedPackageHit> _rankWithValues(
523524
IndexedScore<String> score, {
524-
// if the item count is fewer than this threshold, an empty list will be returned
525-
required int requiredLengthThreshold,
526-
// When no best name match is applied, this parameter will be `-1`
525+
/// When no best name match is applied, this parameter will be `-1`
527526
required int bestNameIndex,
527+
528+
/// Return (and sort) only the top-k results.
529+
required int topK,
528530
}) {
529-
int compare(int aIndex, int bIndex) {
531+
final builder = TopKSortedListBuilder<int>(topK, (aIndex, bIndex) {
530532
if (aIndex == bestNameIndex) return -1;
531533
if (bIndex == bestNameIndex) return 1;
532534
final aScore = score.getValue(aIndex);
@@ -535,20 +537,13 @@ class InMemoryPackageIndex {
535537
if (scoreCompare != 0) return scoreCompare;
536538
// if two packages got the same score, order by last updated
537539
return _compareUpdated(_documents[aIndex], _documents[bIndex]);
538-
}
539-
540-
final list = <int>[];
540+
});
541541
for (var i = 0; i < score.length; i++) {
542542
final value = score.getValue(i);
543543
if (value <= 0.0 && i != bestNameIndex) continue;
544-
list.add(i);
545-
}
546-
if (requiredLengthThreshold > list.length) {
547-
// There is no point to sort or even keep the results, as the search query offset ignores these anyway.
548-
return [];
544+
builder.add(i);
549545
}
550-
list.sort(compare);
551-
return list.map((i) => IndexedPackageHit(
546+
return builder.getTopK().map((i) => IndexedPackageHit(
552547
i, PackageHit(package: score.keys[i], score: score.getValue(i))));
553548
}
554549

app/lib/search/token_index.dart

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import 'dart:math' as math;
66

77
import 'package:meta/meta.dart';
8+
import 'package:pub_dev/search/heap.dart';
89
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
910

1011
import 'text_utils.dart';
@@ -313,21 +314,16 @@ class IndexedScore<K> {
313314
}
314315

315316
Map<K, double> top(int count, {double? minValue}) {
316-
final list = <int>[];
317-
double? lastValue;
317+
minValue ??= 0.0;
318+
final builder = TopKSortedListBuilder<int>(
319+
count, (a, b) => -_values[a].compareTo(_values[b]));
318320
for (var i = 0; i < length; i++) {
319321
final v = _values[i];
320-
if (minValue != null && v < minValue) continue;
321-
if (list.length == count) {
322-
if (lastValue != null && lastValue >= v) continue;
323-
list[count - 1] = i;
324-
} else {
325-
list.add(i);
326-
}
327-
list.sort((a, b) => -_values[a].compareTo(_values[b]));
328-
lastValue = _values[list.last];
322+
if (v < minValue) continue;
323+
builder.add(i);
329324
}
330-
return Map.fromEntries(list.map((i) => MapEntry(_keys[i], _values[i])));
325+
return Map.fromEntries(
326+
builder.getTopK().map((i) => MapEntry(_keys[i], _values[i])));
331327
}
332328

333329
Map<K, double> toMap() {

app/test/search/dartdoc_index_parsing_test.dart

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,14 @@ void main() {
8080
'score': closeTo(0.98, 0.01),
8181
'apiPages': [
8282
{
83-
'path': 'widgets/StatelessWidget-class.html',
83+
'path': 'widgets/StatelessWidget/StatelessWidget.html',
8484
'url':
85-
'https://api.flutter.dev/flutter/widgets/StatelessWidget-class.html'
85+
'https://api.flutter.dev/flutter/widgets/StatelessWidget/StatelessWidget.html'
8686
},
8787
{
88-
'path': 'widgets/StatelessWidget/StatelessWidget.html',
88+
'path': 'widgets/StatelessWidget-class.html',
8989
'url':
90-
'https://api.flutter.dev/flutter/widgets/StatelessWidget/StatelessWidget.html'
90+
'https://api.flutter.dev/flutter/widgets/StatelessWidget-class.html'
9191
},
9292
{
9393
'path': 'widgets/StatelessWidget/build.html',

app/test/search/heap_test.dart

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'package:pub_dev/search/heap.dart';
6+
import 'package:test/test.dart';
7+
8+
void main() {
9+
group('top-k sorted list', () {
10+
int compare(int a, int b) => -a.compareTo(b);
11+
12+
test('no items', () {
13+
final builder = TopKSortedListBuilder(5, compare);
14+
expect(builder.getTopK().toList(), []);
15+
});
16+
17+
test('single item', () {
18+
final builder = TopKSortedListBuilder(5, compare);
19+
builder.add(1);
20+
expect(builder.getTopK().toList(), [1]);
21+
});
22+
23+
test('three items ascending', () {
24+
final builder = TopKSortedListBuilder(5, compare);
25+
builder.addAll([1, 2, 3]);
26+
expect(builder.getTopK().toList(), [3, 2, 1]);
27+
});
28+
29+
test('three items descending', () {
30+
final builder = TopKSortedListBuilder(5, compare);
31+
builder.addAll([3, 2, 1]);
32+
expect(builder.getTopK().toList(), [3, 2, 1]);
33+
});
34+
35+
test('10 items + repeated', () {
36+
final builder = TopKSortedListBuilder(5, compare);
37+
builder.addAll([1, 10, 2, 9, 3, 8, 4, 7, 6, 5, 9]);
38+
expect(builder.getTopK().toList(), [10, 9, 9, 8, 7]);
39+
});
40+
});
41+
}

0 commit comments

Comments
 (0)