Skip to content

Commit 082d73f

Browse files
committed
Reduce the memory allocation during search with mutable IndexedScore.
1 parent 12f2c66 commit 082d73f

File tree

4 files changed

+131
-90
lines changed

4 files changed

+131
-90
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
22
AppEngine version, listed here to ease deployment and troubleshooting.
33

44
## Next Release (replace with git tag when deployed)
5+
* `search` uses the `IndexedScore` to reduce memory allocations.
56

67
## `20241031t095600-all`
78
* Bumped runtimeVersion to `2024.10.29`.

app/lib/search/mem_index.dart

Lines changed: 51 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -311,32 +311,28 @@ class InMemoryPackageIndex {
311311
// We cannot update the main `packages` variable yet, as the dartdoc API
312312
// symbols are added on top of the core results, and `packages` is used
313313
// there too.
314-
final coreScores = <Score>[];
315-
var wordScopedPackages = packages;
314+
final coreScores = IndexedScore(_packageNameIndex._packageNames);
315+
for (var i = 0; i < _documents.length; i++) {
316+
if (packages.contains(_documents[i].package)) {
317+
coreScores.setValue(i, 1.0);
318+
}
319+
}
320+
316321
for (final word in words) {
317-
final nameScore =
318-
_packageNameIndex.searchWord(word, packages: wordScopedPackages);
319322
if (includeNameMatches && _documentsByName.containsKey(word)) {
320323
nameMatches ??= <String>{};
321324
nameMatches.add(word);
322325
}
323326

324-
final descr = _descrIndex
325-
.searchWords([word], weight: 0.90, limitToIds: wordScopedPackages);
326-
final readme = _readmeIndex
327-
.searchWords([word], weight: 0.75, limitToIds: wordScopedPackages);
328-
final score = Score.max([nameScore, descr, readme]);
329-
coreScores.add(score);
330-
// don't update if the query is single-word
331-
if (words.length > 1) {
332-
wordScopedPackages = score.keys.toSet();
333-
if (wordScopedPackages.isEmpty) {
334-
break;
335-
}
336-
}
327+
final wordScore =
328+
_packageNameIndex.searchWord(word, filterOnNonZeros: coreScores);
329+
_descrIndex.searchAndAccumulate(word,
330+
weight: 0.90.toDouble(), score: wordScore);
331+
_readmeIndex.searchAndAccumulate(word,
332+
weight: 0.75.toDouble(), score: wordScore);
333+
coreScores.multiplyAllFrom(wordScore);
337334
}
338-
339-
final core = Score.multiply(coreScores);
335+
final core = coreScores.toScore();
340336

341337
var symbolPages = Score.empty;
342338
if (!checkAborted()) {
@@ -502,16 +498,13 @@ class _TextResults {
502498
@visibleForTesting
503499
class PackageNameIndex {
504500
final List<String> _packageNames;
505-
late final Map<String, _PkgNameData> _data;
501+
late final List<_PkgNameData> _data;
506502

507503
PackageNameIndex(this._packageNames) {
508-
_data = Map.fromEntries(_packageNames.map((package) {
504+
_data = _packageNames.map((package) {
509505
final collapsed = _collapseName(package);
510-
return MapEntry(
511-
package,
512-
_PkgNameData(collapsed, trigrams(collapsed).toSet()),
513-
);
514-
}));
506+
return _PkgNameData(collapsed, trigrams(collapsed).toSet());
507+
}).toList();
515508
}
516509

517510
/// Maps package name to a reduced form of the name:
@@ -520,27 +513,44 @@ class PackageNameIndex {
520513
package.replaceAll('_', '').toLowerCase();
521514

522515
/// Search [text] and return the matching packages with scores.
516+
@visibleForTesting
523517
Score search(String text) {
524-
return Score.multiply(splitForQuery(text).map(searchWord).toList());
518+
IndexedScore? score;
519+
for (final w in splitForQuery(text)) {
520+
final s = searchWord(w, filterOnNonZeros: score);
521+
if (score == null) {
522+
score = s;
523+
} else {
524+
score.multiplyAllFrom(s);
525+
}
526+
}
527+
return score?.toScore() ?? Score.empty;
525528
}
526529

527-
/// Search using the parsed [word] and return the match packages with scores.
528-
Score searchWord(String word, {Set<String>? packages}) {
529-
final pkgNamesToCheck = packages ?? _packageNames;
530-
final values = <String, double>{};
530+
/// Search using the parsed [word] and return the matching packages with scores
531+
/// as a new [IndexedScore] instance.
532+
///
533+
/// When [filterOnNonZeros] is present, only the indexes with an already
534+
/// non-zero value are evaluated.
535+
IndexedScore searchWord(
536+
String word, {
537+
IndexedScore? filterOnNonZeros,
538+
}) {
539+
final scoreore = IndexedScore(_packageNames);
531540
final singularWord = word.length <= 3 || !word.endsWith('s')
532541
? word
533542
: word.substring(0, word.length - 1);
534543
final collapsedWord = _collapseName(singularWord);
535544
final parts =
536545
collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord);
537-
for (final pkg in pkgNamesToCheck) {
538-
final entry = _data[pkg];
539-
if (entry == null) {
546+
for (var i = 0; i < _data.length; i++) {
547+
if (filterOnNonZeros?.isNotPositive(i) ?? false) {
540548
continue;
541549
}
550+
551+
final entry = _data[i];
542552
if (entry.collapsed.contains(collapsedWord)) {
543-
values[pkg] = 1.0;
553+
scoreore.setValue(i, 1.0);
544554
continue;
545555
}
546556
var matched = 0;
@@ -549,11 +559,15 @@ class PackageNameIndex {
549559
matched++;
550560
}
551561
}
562+
// making sure that match score is minimum 0.5
552563
if (matched > 0) {
553-
values[pkg] = matched / parts.length;
564+
final v = matched / parts.length;
565+
if (v >= 0.5) {
566+
scoreore.setValue(i, v);
567+
}
554568
}
555569
}
556-
return Score(values).removeLowValues(fraction: 0.5, minValue: 0.5);
570+
return scoreore;
557571
}
558572
}
559573

app/lib/search/token_index.dart

Lines changed: 77 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,10 @@ class TokenIndex {
164164
}
165165
// Document weight is a highly scaled-down proxy of the length.
166166
final dw = 1 + math.log(1 + tokens.length) / 100;
167-
for (final token in tokens.keys) {
167+
for (final e in tokens.entries) {
168+
final token = e.key;
168169
final weights = _inverseIds.putIfAbsent(token, () => {});
169-
weights[i] = math.max(weights[i] ?? 0.0, tokens[token]! / dw);
170+
weights[i] = math.max(weights[i] ?? 0.0, e.value / dw);
170171
}
171172
}
172173
}
@@ -205,39 +206,6 @@ class TokenIndex {
205206
return tokenMatch;
206207
}
207208

208-
/// Returns an {id: score} map of the documents stored in the [TokenIndex].
209-
/// The tokens in [tokenMatch] will be used to calculate a weighted sum of scores.
210-
///
211-
/// When [limitToIds] is specified, the result will contain only the set of
212-
/// identifiers in it.
213-
Map<String, double> _scoreDocs(TokenMatch tokenMatch,
214-
{double weight = 1.0, Set<String>? limitToIds}) {
215-
// Summarize the scores for the documents.
216-
final docScores = List<double>.filled(_length, 0.0);
217-
for (final token in tokenMatch.tokens) {
218-
final docWeights = _inverseIds[token]!;
219-
for (final e in docWeights.entries) {
220-
final i = e.key;
221-
docScores[i] = math.max(docScores[i], tokenMatch[token]! * e.value);
222-
}
223-
}
224-
225-
final result = <String, double>{};
226-
// post-process match weights
227-
for (var i = 0; i < _length; i++) {
228-
final id = _ids[i];
229-
final w = docScores[i];
230-
if (w <= 0.0) {
231-
continue;
232-
}
233-
if (limitToIds != null && !limitToIds.contains(id)) {
234-
continue;
235-
}
236-
result[id] = w * weight;
237-
}
238-
return result;
239-
}
240-
241209
/// Search the index for [text], with a (term-match / document coverage percent)
242210
/// scoring.
243211
@visibleForTesting
@@ -247,24 +215,82 @@ class TokenIndex {
247215

248216
/// Search the index for [words], with a (term-match / document coverage percent)
249217
/// scoring.
250-
Score searchWords(List<String> words,
251-
{double weight = 1.0, Set<String>? limitToIds}) {
252-
if (limitToIds != null && limitToIds.isEmpty) {
253-
return Score.empty;
254-
}
255-
final scores = <Score>[];
218+
Score searchWords(List<String> words, {double weight = 1.0}) {
219+
IndexedScore? score;
256220
for (final w in words) {
257-
final tokens = lookupTokens(w);
258-
final values = _scoreDocs(
259-
tokens,
260-
weight: weight,
261-
limitToIds: limitToIds,
262-
);
263-
if (values.isEmpty) {
264-
return Score.empty;
221+
final s = IndexedScore(_ids);
222+
searchAndAccumulate(w, score: s, weight: weight);
223+
if (score == null) {
224+
score = s;
225+
// reset weight
226+
weight = 1.0;
227+
} else {
228+
score.multiplyAllFrom(s);
229+
}
230+
}
231+
return score?.toScore() ?? Score.empty;
232+
}
233+
234+
/// Searches the index with [word] and stores the results in [score], using
235+
/// accumulation operation on the already existing values.
236+
void searchAndAccumulate(
237+
String word, {
238+
double weight = 1.0,
239+
required IndexedScore score,
240+
}) {
241+
assert(score.length == _length);
242+
final tokenMatch = lookupTokens(word);
243+
for (final token in tokenMatch.tokens) {
244+
final matchWeight = tokenMatch[token]!;
245+
final tokenWeight = _inverseIds[token]!;
246+
for (final e in tokenWeight.entries) {
247+
score.setValueMaxOf(e.key, matchWeight * e.value * weight);
248+
}
249+
}
250+
}
251+
}
252+
253+
/// Mutable score list that can accessed via integer index.
254+
class IndexedScore {
255+
final List<String> _keys;
256+
final List<double> _values;
257+
258+
IndexedScore._(this._keys, this._values);
259+
260+
factory IndexedScore(List<String> keys) =>
261+
IndexedScore._(keys, List<double>.filled(keys.length, 0.0));
262+
263+
late final length = _values.length;
264+
265+
bool isNotPositive(int index) {
266+
return _values[index] <= 0.0;
267+
}
268+
269+
void setValue(int index, double value) {
270+
_values[index] = value;
271+
}
272+
273+
void setValueMaxOf(int index, double value) {
274+
_values[index] = math.max(_values[index], value);
275+
}
276+
277+
void multiplyAllFrom(IndexedScore other) {
278+
assert(other._values.length == _values.length);
279+
for (var i = 0; i < _values.length; i++) {
280+
if (_values[i] == 0.0) continue;
281+
final v = other._values[i];
282+
_values[i] = v == 0.0 ? 0.0 : _values[i] * v;
283+
}
284+
}
285+
286+
Score toScore() {
287+
final map = <String, double>{};
288+
for (var i = 0; i < _values.length; i++) {
289+
final v = _values[i];
290+
if (v > 0.0) {
291+
map[_keys[i]] = v;
265292
}
266-
scores.add(Score(values));
267293
}
268-
return Score.multiply(scores);
294+
return Score._(map);
269295
}
270296
}

app/test/search/api_doc_page_test.dart

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ void main() {
9999
'packageHits': [
100100
{
101101
'package': 'foo',
102-
'score': closeTo(0.18, 0.01), // find WebPageGenerator
102+
'score': closeTo(0.26, 0.01), // find WebPageGenerator
103103
'apiPages': [
104104
{'path': 'generator.html'},
105105
],
@@ -119,7 +119,7 @@ void main() {
119119
'packageHits': [
120120
{
121121
'package': 'foo',
122-
'score': closeTo(0.11, 0.01), // find WebPageGenerator
122+
'score': closeTo(0.15, 0.01), // find WebPageGenerator
123123
'apiPages': [
124124
{'path': 'generator.html'},
125125
],

0 commit comments

Comments
 (0)