Skip to content

Commit 2ebde53

Browse files
authored
Use IndexedScore to accumulate name, description and readme scores. (#8237)
1 parent 984b234 commit 2ebde53

File tree

3 files changed

+78
-45
lines changed

3 files changed

+78
-45
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
22
AppEngine version, listed here to ease deployment and troubleshooting.
33

44
## Next Release (replace with git tag when deployed)
5+
* `search` uses the `IndexedScore` to reduce memory allocations.
56

67
## `20241031t095600-all`
78
* Bumped runtimeVersion to `2024.10.29`.

app/lib/search/mem_index.dart

Lines changed: 35 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -304,32 +304,28 @@ class InMemoryPackageIndex {
304304
// We cannot update the main `packages` variable yet, as the dartdoc API
305305
// symbols are added on top of the core results, and `packages` is used
306306
// there too.
307-
final coreScores = <Score>[];
308-
var wordScopedPackages = packages;
307+
final coreScores = IndexedScore(_packageNameIndex._packageNames);
308+
for (var i = 0; i < _documents.length; i++) {
309+
if (packages.contains(_documents[i].package)) {
310+
coreScores.setValue(i, 1.0);
311+
}
312+
}
309313
for (final word in words) {
310-
final nameScore = _packageNameIndex.searchWord(word,
311-
filterOnPackages: wordScopedPackages);
312314
if (includeNameMatches && _documentsByName.containsKey(word)) {
313315
nameMatches ??= <String>{};
314316
nameMatches.add(word);
315317
}
316318

317-
final descr = _descrIndex
318-
.searchWords([word], weight: 0.90, limitToIds: wordScopedPackages);
319-
final readme = _readmeIndex
320-
.searchWords([word], weight: 0.75, limitToIds: wordScopedPackages);
321-
final score = Score.max([nameScore, descr, readme]);
322-
coreScores.add(score);
323-
// don't update if the query is single-word
324-
if (words.length > 1) {
325-
wordScopedPackages = score.keys.toSet();
326-
if (wordScopedPackages.isEmpty) {
327-
break;
328-
}
329-
}
319+
final wordScore =
320+
_packageNameIndex.searchWord(word, filterOnNonZeros: coreScores);
321+
_descrIndex.searchAndAccumulate(word,
322+
weight: 0.90.toDouble(), score: wordScore);
323+
_readmeIndex.searchAndAccumulate(word,
324+
weight: 0.75.toDouble(), score: wordScore);
325+
coreScores.multiplyAllFrom(wordScore);
330326
}
331327

332-
final core = Score.multiply(coreScores);
328+
final core = coreScores.toScore();
333329

334330
var symbolPages = Score.empty;
335331
if (!checkAborted()) {
@@ -495,16 +491,13 @@ class _TextResults {
495491
@visibleForTesting
496492
class PackageNameIndex {
497493
final List<String> _packageNames;
498-
late final Map<String, _PkgNameData> _data;
494+
late final List<_PkgNameData> _data;
499495

500496
PackageNameIndex(this._packageNames) {
501-
_data = Map.fromEntries(_packageNames.map((package) {
497+
_data = _packageNames.map((package) {
502498
final collapsed = _collapseName(package);
503-
return MapEntry(
504-
package,
505-
_PkgNameData(collapsed, trigrams(collapsed).toSet()),
506-
);
507-
}));
499+
return _PkgNameData(collapsed, trigrams(collapsed).toSet());
500+
}).toList();
508501
}
509502

510503
/// Maps package name to a reduced form of the name:
@@ -515,45 +508,42 @@ class PackageNameIndex {
515508
/// Search [text] and return the matching packages with scores.
516509
@visibleForTesting
517510
Score search(String text) {
518-
Score? score;
511+
IndexedScore? score;
519512
for (final w in splitForQuery(text)) {
520-
final s = searchWord(w, filterOnPackages: score?.keys);
513+
final s = searchWord(w, filterOnNonZeros: score);
521514
if (score == null) {
522515
score = s;
523516
} else {
524-
// Note: on one hand, it is inefficient to multiply the [Score] on each
525-
// iteration. However, (1) this is only happening in test, (2) it may be
526-
// better for the next iteration to work on a more limited `filterOnPackages`,
527-
// and (3) it will be updated to a more efficient in-place update (#8225).
528-
score = Score.multiply([score, s]);
517+
score.multiplyAllFrom(s);
529518
}
530519
}
531-
return score ?? Score.empty;
520+
return score?.toScore() ?? Score.empty;
532521
}
533522

534523
/// Search using the parsed [word] and return the matching packages with scores
535-
/// as a new [Score] instance.
524+
/// as a new [IndexedScore] instance.
536525
///
537-
/// When [filterOnPackages] is present, only the names present are evaluated.
538-
Score searchWord(
526+
/// When [filterOnNonZeros] is present, only the indexes with an already
527+
/// non-zero value are evaluated.
528+
IndexedScore searchWord(
539529
String word, {
540-
Iterable<String>? filterOnPackages,
530+
IndexedScore? filterOnNonZeros,
541531
}) {
542-
final pkgNamesToCheck = filterOnPackages ?? _packageNames;
543-
final values = <String, double>{};
532+
final score = IndexedScore(_packageNames);
544533
final singularWord = word.length <= 3 || !word.endsWith('s')
545534
? word
546535
: word.substring(0, word.length - 1);
547536
final collapsedWord = _collapseName(singularWord);
548537
final parts =
549538
collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord);
550-
for (final pkg in pkgNamesToCheck) {
551-
final entry = _data[pkg];
552-
if (entry == null) {
539+
for (var i = 0; i < _data.length; i++) {
540+
if (filterOnNonZeros?.isNotPositive(i) ?? false) {
553541
continue;
554542
}
543+
544+
final entry = _data[i];
555545
if (entry.collapsed.contains(collapsedWord)) {
556-
values[pkg] = 1.0;
546+
score.setValue(i, 1.0);
557547
continue;
558548
}
559549
var matched = 0;
@@ -567,11 +557,11 @@ class PackageNameIndex {
567557
if (matched > 0) {
568558
final v = matched / parts.length;
569559
if (v >= 0.5) {
570-
values[pkg] = v;
560+
score.setValue(i, v);
571561
}
572562
}
573563
}
574-
return Score(values);
564+
return score;
575565
}
576566
}
577567

app/lib/search/token_index.dart

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,24 @@ class TokenIndex {
267267
}
268268
return Score.multiply(scores);
269269
}
270+
271+
/// Searches the index with [word] and stores the results in [score], using
272+
/// accumulation operation on the already existing values.
273+
void searchAndAccumulate(
274+
String word, {
275+
double weight = 1.0,
276+
required IndexedScore score,
277+
}) {
278+
assert(score.length == _length);
279+
final tokenMatch = lookupTokens(word);
280+
for (final token in tokenMatch.tokens) {
281+
final matchWeight = tokenMatch[token]!;
282+
final tokenWeight = _inverseIds[token]!;
283+
for (final e in tokenWeight.entries) {
284+
score.setValueMaxOf(e.key, matchWeight * e.value * weight);
285+
}
286+
}
287+
}
270288
}
271289

272290
/// Mutable score list that can accessed via integer index.
@@ -285,6 +303,10 @@ class IndexedScore {
285303
return _values[index] <= 0.0;
286304
}
287305

306+
void setValue(int index, double value) {
307+
_values[index] = value;
308+
}
309+
288310
void setValueMaxOf(int index, double value) {
289311
_values[index] = math.max(_values[index], value);
290312
}
@@ -307,6 +329,15 @@ class IndexedScore {
307329
}
308330
}
309331

332+
void multiplyAllFrom(IndexedScore other) {
333+
assert(other._values.length == _values.length);
334+
for (var i = 0; i < _values.length; i++) {
335+
if (_values[i] == 0.0) continue;
336+
final v = other._values[i];
337+
_values[i] = v == 0.0 ? 0.0 : _values[i] * v;
338+
}
339+
}
340+
310341
Set<String> toKeySet() {
311342
final set = <String>{};
312343
for (var i = 0; i < _values.length; i++) {
@@ -317,4 +348,15 @@ class IndexedScore {
317348
}
318349
return set;
319350
}
351+
352+
Score toScore() {
353+
final map = <String, double>{};
354+
for (var i = 0; i < _values.length; i++) {
355+
final v = _values[i];
356+
if (v > 0.0) {
357+
map[_keys[i]] = v;
358+
}
359+
}
360+
return Score._(map);
361+
}
320362
}

0 commit comments

Comments
 (0)