Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
AppEngine version, listed here to ease deployment and troubleshooting.

## Next Release (replace with git tag when deployed)
* `search` uses the `IndexedScore` to reduce memory allocations.

## `20241031t095600-all`
* Bumped runtimeVersion to `2024.10.29`.
Expand Down
83 changes: 37 additions & 46 deletions app/lib/search/mem_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -311,32 +311,28 @@ class InMemoryPackageIndex {
// We cannot update the main `packages` variable yet, as the dartdoc API
// symbols are added on top of the core results, and `packages` is used
// there too.
final coreScores = <Score>[];
var wordScopedPackages = packages;
final coreScores = IndexedScore(_packageNameIndex._packageNames);
for (var i = 0; i < _documents.length; i++) {
if (packages.contains(_documents[i].package)) {
coreScores.setValue(i, 1.0);
}
}

for (final word in words) {
final nameScore = _packageNameIndex.searchWord(word,
filterOnPackages: wordScopedPackages);
if (includeNameMatches && _documentsByName.containsKey(word)) {
nameMatches ??= <String>{};
nameMatches.add(word);
}

final descr = _descrIndex
.searchWords([word], weight: 0.90, limitToIds: wordScopedPackages);
final readme = _readmeIndex
.searchWords([word], weight: 0.75, limitToIds: wordScopedPackages);
final score = Score.max([nameScore, descr, readme]);
coreScores.add(score);
// don't update if the query is single-word
if (words.length > 1) {
wordScopedPackages = score.keys.toSet();
if (wordScopedPackages.isEmpty) {
break;
}
}
final wordScore =
_packageNameIndex.searchWord(word, filterOnNonZeros: coreScores);
_descrIndex.searchAndAccumulate(word,
weight: 0.90.toDouble(), score: wordScore);
_readmeIndex.searchAndAccumulate(word,
weight: 0.75.toDouble(), score: wordScore);
coreScores.multiplyAllFrom(wordScore);
}

final core = Score.multiply(coreScores);
final core = coreScores.toScore();

var symbolPages = Score.empty;
if (!checkAborted()) {
Expand Down Expand Up @@ -502,16 +498,13 @@ class _TextResults {
@visibleForTesting
class PackageNameIndex {
final List<String> _packageNames;
late final Map<String, _PkgNameData> _data;
late final List<_PkgNameData> _data;

PackageNameIndex(this._packageNames) {
_data = Map.fromEntries(_packageNames.map((package) {
_data = _packageNames.map((package) {
final collapsed = _collapseName(package);
return MapEntry(
package,
_PkgNameData(collapsed, trigrams(collapsed).toSet()),
);
}));
return _PkgNameData(collapsed, trigrams(collapsed).toSet());
}).toList();
}

/// Maps package name to a reduced form of the name:
Expand All @@ -522,45 +515,43 @@ class PackageNameIndex {
/// Search [text] and return the matching packages with scores.
@visibleForTesting
Score search(String text) {
Score? score;
IndexedScore? score;
for (final w in splitForQuery(text)) {
final s = searchWord(w, filterOnPackages: score?.keys);
final s = searchWord(w, filterOnNonZeros: score);
if (score == null) {
score = s;
} else {
// Note: on one hand, it is inefficient to multiply the [Score] on each
// iteration. However, (1) this is only happening in test, (2) it may be
// better for the next iteration to work on a more limited `filterOnPackages`,
// and (3) it will be updated to a more efficient in-place update (#8225).
score = Score.multiply([score, s]);
score.multiplyAllFrom(s);
}
}
return score ?? Score.empty;
return score?.toScore() ?? Score.empty;

}

/// Search using the parsed [word] and return the matching packages with scores
/// as a new [Score] instance.
/// as a new [IndexedScore] instance.
///
/// When [filterOnPackages] is present, only the names present are evaluated.
Score searchWord(
/// When [filterOnNonZeros] is present, only the indexes with an already
/// non-zero value are evaluated.
IndexedScore searchWord(
String word, {
Iterable<String>? filterOnPackages,
IndexedScore? filterOnNonZeros,
}) {
final pkgNamesToCheck = filterOnPackages ?? _packageNames;
final values = <String, double>{};
final score = IndexedScore(_packageNames);
final singularWord = word.length <= 3 || !word.endsWith('s')
? word
: word.substring(0, word.length - 1);
final collapsedWord = _collapseName(singularWord);
final parts =
collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord);
for (final pkg in pkgNamesToCheck) {
final entry = _data[pkg];
if (entry == null) {
for (var i = 0; i < _data.length; i++) {
if (filterOnNonZeros?.isNotPositive(i) ?? false) {
continue;
}

final entry = _data[i];
if (entry.collapsed.contains(collapsedWord)) {
values[pkg] = 1.0;
score.setValue(i, 1.0);
continue;
}
var matched = 0;
Expand All @@ -574,11 +565,11 @@ class PackageNameIndex {
if (matched > 0) {
final v = matched / parts.length;
if (v >= 0.5) {
values[pkg] = v;
score.setValue(i, v);
}
}
}
return Score(values);
return score;
}
}

Expand Down
113 changes: 69 additions & 44 deletions app/lib/search/token_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -206,37 +206,22 @@ class TokenIndex {
return tokenMatch;
}

/// Returns an {id: score} map of the documents stored in the [TokenIndex].
/// The tokens in [tokenMatch] will be used to calculate a weighted sum of scores.
///
/// When [limitToIds] is specified, the result will contain only the set of
/// identifiers in it.
Map<String, double> _scoreDocs(TokenMatch tokenMatch,
{double weight = 1.0, Set<String>? limitToIds}) {
// Summarize the scores for the documents.
final docScores = List<double>.filled(_length, 0.0);
/// Searches the index with [word] and stores the results in [score], using
/// accumulation operation on the already existing values.
void searchAndAccumulate(
String word, {
double weight = 1.0,
required IndexedScore score,
}) {
assert(score.length == _length);
final tokenMatch = lookupTokens(word);
for (final token in tokenMatch.tokens) {
final docWeights = _inverseIds[token]!;
for (final e in docWeights.entries) {
final i = e.key;
docScores[i] = math.max(docScores[i], tokenMatch[token]! * e.value);
final matchWeight = tokenMatch[token]!;
final tokenWeight = _inverseIds[token]!;
for (final e in tokenWeight.entries) {
score.setValueMaxOf(e.key, matchWeight * e.value * weight);
}
}

final result = <String, double>{};
// post-process match weights
for (var i = 0; i < _length; i++) {
final id = _ids[i];
final w = docScores[i];
if (w <= 0.0) {
continue;
}
if (limitToIds != null && !limitToIds.contains(id)) {
continue;
}
result[id] = w * weight;
}
return result;
}

/// Search the index for [text], with a (term-match / document coverage percent)
Expand All @@ -248,24 +233,64 @@ class TokenIndex {

/// Search the index for [words], with a (term-match / document coverage percent)
/// scoring.
Score searchWords(List<String> words,
{double weight = 1.0, Set<String>? limitToIds}) {
if (limitToIds != null && limitToIds.isEmpty) {
return Score.empty;
}
final scores = <Score>[];
Score searchWords(List<String> words, {double weight = 1.0}) {
IndexedScore? score;
for (final w in words) {
final tokens = lookupTokens(w);
final values = _scoreDocs(
tokens,
weight: weight,
limitToIds: limitToIds,
);
if (values.isEmpty) {
return Score.empty;
final s = IndexedScore(_ids);
searchAndAccumulate(w, score: s, weight: weight);
if (score == null) {
score = s;
// reset weight
weight = 1.0;
} else {
score.multiplyAllFrom(s);
}
}
return score?.toScore() ?? Score.empty;
}
}

/// Mutable score list that can accessed via integer index.
class IndexedScore {
final List<String> _keys;
final List<double> _values;

IndexedScore._(this._keys, this._values);

factory IndexedScore(List<String> keys) =>
IndexedScore._(keys, List<double>.filled(keys.length, 0.0));

late final length = _values.length;

bool isNotPositive(int index) {
return _values[index] <= 0.0;
}

void setValue(int index, double value) {
_values[index] = value;
}

void setValueMaxOf(int index, double value) {
_values[index] = math.max(_values[index], value);
}

void multiplyAllFrom(IndexedScore other) {
assert(other._values.length == _values.length);
for (var i = 0; i < _values.length; i++) {
if (_values[i] == 0.0) continue;
final v = other._values[i];
_values[i] = v == 0.0 ? 0.0 : _values[i] * v;
}
}

Score toScore() {
final map = <String, double>{};
for (var i = 0; i < _values.length; i++) {
final v = _values[i];
if (v > 0.0) {
map[_keys[i]] = v;
}
scores.add(Score(values));
}
return Score.multiply(scores);
return Score._(map);
}
}
4 changes: 2 additions & 2 deletions app/test/search/api_doc_page_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void main() {
'packageHits': [
{
'package': 'foo',
'score': closeTo(0.18, 0.01), // find WebPageGenerator
'score': closeTo(0.26, 0.01), // find WebPageGenerator
'apiPages': [
{'path': 'generator.html'},
],
Expand All @@ -119,7 +119,7 @@ void main() {
'packageHits': [
{
'package': 'foo',
'score': closeTo(0.11, 0.01), // find WebPageGenerator
'score': closeTo(0.15, 0.01), // find WebPageGenerator
'apiPages': [
{'path': 'generator.html'},
],
Expand Down
Loading