Skip to content

Commit 25c2a5a

Browse files
authored
Use top-k sorted list builder with heap for search. (#8826)
1 parent c3d64d9 commit 25c2a5a

File tree

4 files changed

+190
-27
lines changed

4 files changed

+190
-27
lines changed

app/lib/search/heap.dart

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// A data structure that keep satisfies the "heap property". This property
6+
/// dictates that in a max-heap, each node's value is greater than or equal
7+
/// to its children's values, and in a min-heap, each node's value is less
8+
/// than or equal to its children's values.
9+
///
10+
/// The provided comparator decides which kind of heap is being built.
11+
class Heap<T> {
12+
final Comparator<T> _compare;
13+
final _items = <T>[];
14+
bool _isValid = true;
15+
16+
Heap(this._compare);
17+
18+
int get length => _items.length;
19+
20+
/// Collects [item] and adds it to the end of the internal list, marks the [Heap]
21+
/// as non-valid.
22+
///
23+
/// A separate operation may trigger the restoration of the heap proprerty.
24+
void collect(T item) {
25+
_items.add(item);
26+
_isValid = false;
27+
}
28+
29+
/// Collects [items] and adds them the end of the internal list, marks the [Heap]
30+
/// as non-valid.
31+
///
32+
/// A separate operation may trigger the restoration of the heap proprerty.
33+
void collectAll(Iterable<T> items) {
34+
_items.addAll(items);
35+
_isValid = false;
36+
}
37+
38+
/// Ensures that the tree structure below the [index] is a valid heap.
39+
void _heapify(int index) {
40+
final maxLength = _items.length;
41+
final item = _items[index];
42+
while (index < maxLength) {
43+
final leftIndex = (index << 1) + 1;
44+
if (leftIndex >= maxLength) {
45+
return;
46+
}
47+
var childIndex = leftIndex;
48+
final rightIndex = leftIndex + 1;
49+
if (rightIndex < maxLength &&
50+
_compare(_items[leftIndex], _items[rightIndex]) > 0) {
51+
childIndex = rightIndex;
52+
}
53+
if (_compare(item, _items[childIndex]) <= 0) {
54+
return;
55+
}
56+
_items[index] = _items[childIndex];
57+
_items[childIndex] = item;
58+
index = childIndex;
59+
}
60+
}
61+
62+
/// (Re-)builds the heap property if needed.
63+
void _buildHeapIfNeeded() {
64+
if (_isValid) {
65+
assert(_isValidHeap());
66+
return;
67+
}
68+
69+
if (_items.isEmpty) {
70+
_isValid = true;
71+
return;
72+
}
73+
for (var i = (_items.length >> 1); i >= 0; i--) {
74+
_heapify(i);
75+
}
76+
77+
assert(_isValidHeap());
78+
_isValid = true;
79+
}
80+
81+
/// Verifies the heap property is true for all items.
82+
bool _isValidHeap() {
83+
for (var i = 1; i < _items.length; i++) {
84+
final parentIndex = (i - 1) >> 1;
85+
if (_compare(_items[parentIndex], _items[i]) > 0) {
86+
return false;
87+
}
88+
}
89+
return true;
90+
}
91+
92+
/// Creates a sorted list of the top-k items and removes them from the [Heap].
93+
///
94+
/// The algorithm builds a max-heap in `O(N)` steps on the already collected items,
95+
/// and then selects the top-k items by removing the largest item from the [Heap]
96+
/// and restoring the heap property again in `O(k * log(N))` steps.
97+
Iterable<T> getAndRemoveTopK(int k) sync* {
98+
_buildHeapIfNeeded();
99+
var remaining = k;
100+
while (remaining > 0 && _items.isNotEmpty) {
101+
yield _items[0];
102+
remaining--;
103+
final last = _items.removeLast();
104+
if (_items.isEmpty) {
105+
break;
106+
}
107+
_items[0] = last;
108+
_heapify(0);
109+
}
110+
assert(_isValidHeap());
111+
}
112+
}

app/lib/search/mem_index.dart

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import 'package:clock/clock.dart';
99
import 'package:collection/collection.dart';
1010
import 'package:logging/logging.dart';
1111
import 'package:meta/meta.dart';
12+
import 'package:pub_dev/search/heap.dart';
1213
import 'package:pub_dev/service/topics/models.dart';
1314
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1415

@@ -292,8 +293,8 @@ class InMemoryPackageIndex {
292293
}
293294
indexedHits = _rankWithValues(
294295
packageScores,
295-
requiredLengthThreshold: query.offset,
296296
bestNameIndex: bestNameIndex ?? -1,
297+
topK: query.offset + query.limit,
297298
);
298299
break;
299300
case SearchOrder.created:
@@ -521,12 +522,13 @@ class InMemoryPackageIndex {
521522

522523
Iterable<IndexedPackageHit> _rankWithValues(
523524
IndexedScore<String> score, {
524-
// if the item count is fewer than this threshold, an empty list will be returned
525-
required int requiredLengthThreshold,
526-
// When no best name match is applied, this parameter will be `-1`
525+
/// When no best name match is applied, this parameter will be `-1`
527526
required int bestNameIndex,
527+
528+
/// Return (and sort) only the top-k results.
529+
required int topK,
528530
}) {
529-
int compare(int aIndex, int bIndex) {
531+
final heap = Heap<int>((aIndex, bIndex) {
530532
if (aIndex == bestNameIndex) return -1;
531533
if (bIndex == bestNameIndex) return 1;
532534
final aScore = score.getValue(aIndex);
@@ -535,20 +537,13 @@ class InMemoryPackageIndex {
535537
if (scoreCompare != 0) return scoreCompare;
536538
// if two packages got the same score, order by last updated
537539
return _compareUpdated(_documents[aIndex], _documents[bIndex]);
538-
}
539-
540-
final list = <int>[];
540+
});
541541
for (var i = 0; i < score.length; i++) {
542542
final value = score.getValue(i);
543543
if (value <= 0.0 && i != bestNameIndex) continue;
544-
list.add(i);
545-
}
546-
if (requiredLengthThreshold > list.length) {
547-
// There is no point to sort or even keep the results, as the search query offset ignores these anyway.
548-
return [];
544+
heap.collect(i);
549545
}
550-
list.sort(compare);
551-
return list.map((i) => IndexedPackageHit(
546+
return heap.getAndRemoveTopK(topK).map((i) => IndexedPackageHit(
552547
i, PackageHit(package: score.keys[i], score: score.getValue(i))));
553548
}
554549

app/lib/search/token_index.dart

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import 'dart:math' as math;
66

77
import 'package:meta/meta.dart';
8+
import 'package:pub_dev/search/heap.dart';
89
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
910

1011
import 'text_utils.dart';
@@ -313,21 +314,16 @@ class IndexedScore<K> {
313314
}
314315

315316
Map<K, double> top(int count, {double? minValue}) {
316-
final list = <int>[];
317-
double? lastValue;
317+
minValue ??= 0.0;
318+
final heap = Heap<int>((a, b) => -_values[a].compareTo(_values[b]));
318319
for (var i = 0; i < length; i++) {
319320
final v = _values[i];
320-
if (minValue != null && v < minValue) continue;
321-
if (list.length == count) {
322-
if (lastValue != null && lastValue >= v) continue;
323-
list[count - 1] = i;
324-
} else {
325-
list.add(i);
326-
}
327-
list.sort((a, b) => -_values[a].compareTo(_values[b]));
328-
lastValue = _values[list.last];
321+
if (v < minValue) continue;
322+
heap.collect(i);
329323
}
330-
return Map.fromEntries(list.map((i) => MapEntry(_keys[i], _values[i])));
324+
return Map.fromEntries(heap
325+
.getAndRemoveTopK(count)
326+
.map((i) => MapEntry(_keys[i], _values[i])));
331327
}
332328

333329
Map<K, double> toMap() {

app/test/search/heap_test.dart

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:math';
6+
7+
import 'package:pub_dev/search/heap.dart';
8+
import 'package:test/test.dart';
9+
10+
void main() {
11+
group('top-k sorted list', () {
12+
int compare(int a, int b) => -a.compareTo(b);
13+
14+
test('no items', () {
15+
final heap = Heap(compare);
16+
expect(heap.getAndRemoveTopK(5).toList(), []);
17+
});
18+
19+
test('single item', () {
20+
final heap = Heap(compare);
21+
heap.collect(1);
22+
expect(heap.getAndRemoveTopK(5).toList(), [1]);
23+
});
24+
25+
test('three items ascending', () {
26+
final builder = Heap(compare);
27+
builder.collectAll([1, 2, 3]);
28+
expect(builder.getAndRemoveTopK(5).toList(), [3, 2, 1]);
29+
});
30+
31+
test('three items descending', () {
32+
final heap = Heap(compare);
33+
heap.collectAll([3, 2, 1]);
34+
expect(heap.getAndRemoveTopK(5).toList(), [3, 2, 1]);
35+
});
36+
37+
test('10 items + repeated', () {
38+
final heap = Heap(compare);
39+
heap.collectAll([1, 10, 2, 9, 3, 8, 4, 7, 6, 5, 9]);
40+
expect(heap.getAndRemoveTopK(5).toList(), [10, 9, 9, 8, 7]);
41+
});
42+
43+
test('randomized verification', () {
44+
for (var i = 0; i < 1000; i++) {
45+
final r = Random(i);
46+
final length = 1000 + r.nextInt(1000);
47+
final k = 10 + r.nextInt(200);
48+
final items = List.generate(length, (i) => i);
49+
final b1 = Heap(compare)..collectAll(items);
50+
final r1 = b1.getAndRemoveTopK(k).toList();
51+
expect(r1, List.generate(k, (i) => length - 1 - i));
52+
53+
items.shuffle(r);
54+
final b2 = Heap(compare)..collectAll(items);
55+
final r2 = b2.getAndRemoveTopK(k).toList();
56+
expect(r2, r1);
57+
}
58+
});
59+
});
60+
}

0 commit comments

Comments
 (0)