Skip to content

Commit 9899f0a

Browse files
committed
Reduce the memory allocation during search with mutable IndexedScore.
1 parent a92e6fd commit 9899f0a

File tree

4 files changed

+114
-97
lines changed

4 files changed

+114
-97
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
22
AppEngine version, listed here to ease deployment and troubleshooting.
33

44
## Next Release (replace with git tag when deployed)
5+
* `search` uses the `IndexedScore` to reduce memory allocations.
56

67
## `20241031t095600-all`
78
* Bumped runtimeVersion to `2024.10.29`.

app/lib/search/mem_index.dart

Lines changed: 37 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -311,32 +311,28 @@ class InMemoryPackageIndex {
311311
// We cannot update the main `packages` variable yet, as the dartdoc API
312312
// symbols are added on top of the core results, and `packages` is used
313313
// there too.
314-
final coreScores = <Score>[];
315-
var wordScopedPackages = packages;
314+
final coreScores = IndexedScore(_packageNameIndex._packageNames);
315+
for (var i = 0; i < _documents.length; i++) {
316+
if (packages.contains(_documents[i].package)) {
317+
coreScores.setValue(i, 1.0);
318+
}
319+
}
320+
316321
for (final word in words) {
317-
final nameScore = _packageNameIndex.searchWord(word,
318-
filterOnPackages: wordScopedPackages);
319322
if (includeNameMatches && _documentsByName.containsKey(word)) {
320323
nameMatches ??= <String>{};
321324
nameMatches.add(word);
322325
}
323326

324-
final descr = _descrIndex
325-
.searchWords([word], weight: 0.90, limitToIds: wordScopedPackages);
326-
final readme = _readmeIndex
327-
.searchWords([word], weight: 0.75, limitToIds: wordScopedPackages);
328-
final score = Score.max([nameScore, descr, readme]);
329-
coreScores.add(score);
330-
// don't update if the query is single-word
331-
if (words.length > 1) {
332-
wordScopedPackages = score.keys.toSet();
333-
if (wordScopedPackages.isEmpty) {
334-
break;
335-
}
336-
}
327+
final wordScore =
328+
_packageNameIndex.searchWord(word, filterOnNonZeros: coreScores);
329+
_descrIndex.searchAndAccumulate(word,
330+
weight: 0.90.toDouble(), score: wordScore);
331+
_readmeIndex.searchAndAccumulate(word,
332+
weight: 0.75.toDouble(), score: wordScore);
333+
coreScores.multiplyAllFrom(wordScore);
337334
}
338-
339-
final core = Score.multiply(coreScores);
335+
final core = coreScores.toScore();
340336

341337
var symbolPages = Score.empty;
342338
if (!checkAborted()) {
@@ -502,16 +498,13 @@ class _TextResults {
502498
@visibleForTesting
503499
class PackageNameIndex {
504500
final List<String> _packageNames;
505-
late final Map<String, _PkgNameData> _data;
501+
late final List<_PkgNameData> _data;
506502

507503
PackageNameIndex(this._packageNames) {
508-
_data = Map.fromEntries(_packageNames.map((package) {
504+
_data = _packageNames.map((package) {
509505
final collapsed = _collapseName(package);
510-
return MapEntry(
511-
package,
512-
_PkgNameData(collapsed, trigrams(collapsed).toSet()),
513-
);
514-
}));
506+
return _PkgNameData(collapsed, trigrams(collapsed).toSet());
507+
}).toList();
515508
}
516509

517510
/// Maps package name to a reduced form of the name:
@@ -522,45 +515,43 @@ class PackageNameIndex {
522515
/// Search [text] and return the matching packages with scores.
523516
@visibleForTesting
524517
Score search(String text) {
525-
Score? score;
518+
IndexedScore? score;
526519
for (final w in splitForQuery(text)) {
527-
final s = searchWord(w, filterOnPackages: score?.keys);
520+
final s = searchWord(w, filterOnNonZeros: score);
528521
if (score == null) {
529522
score = s;
530523
} else {
531-
// Note: on one hand, it is inefficient to multiply the [Score] on each
532-
// iteration. However, (1) this is only happening in test, (2) it may be
533-
// better for the next iteration to work on a more limited `filterOnPackages`,
534-
// and (3) it will be updated to a more efficient in-place update (#8225).
535-
score = Score.multiply([score, s]);
524+
score.multiplyAllFrom(s);
536525
}
537526
}
538-
return score ?? Score.empty;
527+
return score?.toScore() ?? Score.empty;
528+
539529
}
540530

541531
/// Search using the parsed [word] and return the matching packages with scores
542-
/// as a new [Score] instance.
532+
/// as a new [IndexedScore] instance.
543533
///
544-
/// When [filterOnPackages] is present, only the names present are evaluated.
545-
Score searchWord(
534+
/// When [filterOnNonZeros] is present, only the indexes with an already
535+
/// non-zero value are evaluated.
536+
IndexedScore searchWord(
546537
String word, {
547-
Iterable<String>? filterOnPackages,
538+
IndexedScore? filterOnNonZeros,
548539
}) {
549-
final pkgNamesToCheck = filterOnPackages ?? _packageNames;
550-
final values = <String, double>{};
540+
final score = IndexedScore(_packageNames);
551541
final singularWord = word.length <= 3 || !word.endsWith('s')
552542
? word
553543
: word.substring(0, word.length - 1);
554544
final collapsedWord = _collapseName(singularWord);
555545
final parts =
556546
collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord);
557-
for (final pkg in pkgNamesToCheck) {
558-
final entry = _data[pkg];
559-
if (entry == null) {
547+
for (var i = 0; i < _data.length; i++) {
548+
if (filterOnNonZeros?.isNotPositive(i) ?? false) {
560549
continue;
561550
}
551+
552+
final entry = _data[i];
562553
if (entry.collapsed.contains(collapsedWord)) {
563-
values[pkg] = 1.0;
554+
score.setValue(i, 1.0);
564555
continue;
565556
}
566557
var matched = 0;
@@ -574,11 +565,11 @@ class PackageNameIndex {
574565
if (matched > 0) {
575566
final v = matched / parts.length;
576567
if (v >= 0.5) {
577-
values[pkg] = v;
568+
score.setValue(i, v);
578569
}
579570
}
580571
}
581-
return Score(values);
572+
return score;
582573
}
583574
}
584575

app/lib/search/token_index.dart

Lines changed: 74 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -206,39 +206,6 @@ class TokenIndex {
206206
return tokenMatch;
207207
}
208208

209-
/// Returns an {id: score} map of the documents stored in the [TokenIndex].
210-
/// The tokens in [tokenMatch] will be used to calculate a weighted sum of scores.
211-
///
212-
/// When [limitToIds] is specified, the result will contain only the set of
213-
/// identifiers in it.
214-
Map<String, double> _scoreDocs(TokenMatch tokenMatch,
215-
{double weight = 1.0, Set<String>? limitToIds}) {
216-
// Summarize the scores for the documents.
217-
final docScores = List<double>.filled(_length, 0.0);
218-
for (final token in tokenMatch.tokens) {
219-
final docWeights = _inverseIds[token]!;
220-
for (final e in docWeights.entries) {
221-
final i = e.key;
222-
docScores[i] = math.max(docScores[i], tokenMatch[token]! * e.value);
223-
}
224-
}
225-
226-
final result = <String, double>{};
227-
// post-process match weights
228-
for (var i = 0; i < _length; i++) {
229-
final id = _ids[i];
230-
final w = docScores[i];
231-
if (w <= 0.0) {
232-
continue;
233-
}
234-
if (limitToIds != null && !limitToIds.contains(id)) {
235-
continue;
236-
}
237-
result[id] = w * weight;
238-
}
239-
return result;
240-
}
241-
242209
/// Search the index for [text], with a (term-match / document coverage percent)
243210
/// scoring.
244211
@visibleForTesting
@@ -248,24 +215,82 @@ class TokenIndex {
248215

249216
/// Search the index for [words], with a (term-match / document coverage percent)
250217
/// scoring.
251-
Score searchWords(List<String> words,
252-
{double weight = 1.0, Set<String>? limitToIds}) {
253-
if (limitToIds != null && limitToIds.isEmpty) {
254-
return Score.empty;
255-
}
256-
final scores = <Score>[];
218+
Score searchWords(List<String> words, {double weight = 1.0}) {
219+
IndexedScore? score;
257220
for (final w in words) {
258-
final tokens = lookupTokens(w);
259-
final values = _scoreDocs(
260-
tokens,
261-
weight: weight,
262-
limitToIds: limitToIds,
263-
);
264-
if (values.isEmpty) {
265-
return Score.empty;
221+
final s = IndexedScore(_ids);
222+
searchAndAccumulate(w, score: s, weight: weight);
223+
if (score == null) {
224+
score = s;
225+
// reset weight
226+
weight = 1.0;
227+
} else {
228+
score.multiplyAllFrom(s);
229+
}
230+
}
231+
return score?.toScore() ?? Score.empty;
232+
}
233+
234+
/// Searches the index with [word] and stores the results in [score], using
235+
/// accumulation operation on the already existing values.
236+
void searchAndAccumulate(
237+
String word, {
238+
double weight = 1.0,
239+
required IndexedScore score,
240+
}) {
241+
assert(score.length == _length);
242+
final tokenMatch = lookupTokens(word);
243+
for (final token in tokenMatch.tokens) {
244+
final matchWeight = tokenMatch[token]!;
245+
final tokenWeight = _inverseIds[token]!;
246+
for (final e in tokenWeight.entries) {
247+
score.setValueMaxOf(e.key, matchWeight * e.value * weight);
248+
}
249+
}
250+
}
251+
}
252+
253+
/// Mutable score list that can accessed via integer index.
254+
class IndexedScore {
255+
final List<String> _keys;
256+
final List<double> _values;
257+
258+
IndexedScore._(this._keys, this._values);
259+
260+
factory IndexedScore(List<String> keys) =>
261+
IndexedScore._(keys, List<double>.filled(keys.length, 0.0));
262+
263+
late final length = _values.length;
264+
265+
bool isNotPositive(int index) {
266+
return _values[index] <= 0.0;
267+
}
268+
269+
void setValue(int index, double value) {
270+
_values[index] = value;
271+
}
272+
273+
void setValueMaxOf(int index, double value) {
274+
_values[index] = math.max(_values[index], value);
275+
}
276+
277+
void multiplyAllFrom(IndexedScore other) {
278+
assert(other._values.length == _values.length);
279+
for (var i = 0; i < _values.length; i++) {
280+
if (_values[i] == 0.0) continue;
281+
final v = other._values[i];
282+
_values[i] = v == 0.0 ? 0.0 : _values[i] * v;
283+
}
284+
}
285+
286+
Score toScore() {
287+
final map = <String, double>{};
288+
for (var i = 0; i < _values.length; i++) {
289+
final v = _values[i];
290+
if (v > 0.0) {
291+
map[_keys[i]] = v;
266292
}
267-
scores.add(Score(values));
268293
}
269-
return Score.multiply(scores);
294+
return Score._(map);
270295
}
271296
}

app/test/search/api_doc_page_test.dart

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ void main() {
9999
'packageHits': [
100100
{
101101
'package': 'foo',
102-
'score': closeTo(0.18, 0.01), // find WebPageGenerator
102+
'score': closeTo(0.26, 0.01), // find WebPageGenerator
103103
'apiPages': [
104104
{'path': 'generator.html'},
105105
],
@@ -119,7 +119,7 @@ void main() {
119119
'packageHits': [
120120
{
121121
'package': 'foo',
122-
'score': closeTo(0.11, 0.01), // find WebPageGenerator
122+
'score': closeTo(0.15, 0.01), // find WebPageGenerator
123123
'apiPages': [
124124
{'path': 'generator.html'},
125125
],

0 commit comments

Comments
 (0)