Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions app/lib/search/mem_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class InMemoryPackageIndex {
late final TokenIndex<String> _descrIndex;
late final TokenIndex<String> _readmeIndex;
late final TokenIndex<IndexedApiDocPage> _apiSymbolIndex;
late final _scorePool = ScorePool(_packageNameIndex._packageNames);

/// Adjusted score takes the overall score and transforms
/// it linearly into the [0.4-1.0] range.
Expand Down Expand Up @@ -116,8 +117,16 @@ class InMemoryPackageIndex {
}

PackageSearchResult search(ServiceSearchQuery query) {
final packageScores = IndexedScore(_packageNameIndex._packageNames, 1.0);
return _scorePool.withScore(
value: 1.0,
fn: (score) {
return _search(query, score);
},
);
}

PackageSearchResult _search(
ServiceSearchQuery query, IndexedScore<String> packageScores) {
// filter on package prefix
if (query.parsedQuery.packagePrefix != null) {
final String prefix = query.parsedQuery.packagePrefix!.toLowerCase();
Expand Down Expand Up @@ -308,11 +317,17 @@ class InMemoryPackageIndex {
nameMatches.add(word);
}

final wordScore =
_packageNameIndex.searchWord(word, filterOnNonZeros: packageScores);
_descrIndex.searchAndAccumulate(word, score: wordScore);
_readmeIndex.searchAndAccumulate(word, weight: 0.75, score: wordScore);
packageScores.multiplyAllFrom(wordScore);
_scorePool.withScore(
value: 0.0,
fn: (wordScore) {
_packageNameIndex.searchWord(word,
score: wordScore, filterOnNonZeros: packageScores);
_descrIndex.searchAndAccumulate(word, score: wordScore);
_readmeIndex.searchAndAccumulate(word,
weight: 0.75, score: wordScore);
packageScores.multiplyAllFrom(wordScore);
},
);
}

final topApiPages =
Expand Down Expand Up @@ -483,7 +498,8 @@ class PackageNameIndex {
Map<String, double> search(String text) {
IndexedScore<String>? score;
for (final w in splitForQuery(text)) {
final s = searchWord(w, filterOnNonZeros: score);
final s = IndexedScore(_packageNames);
searchWord(w, score: s, filterOnNonZeros: score);
if (score == null) {
score = s;
} else {
Expand All @@ -498,11 +514,12 @@ class PackageNameIndex {
///
/// When [filterOnNonZeros] is present, only the indexes with an already
/// non-zero value are evaluated.
IndexedScore<String> searchWord(
void searchWord(
String word, {
required IndexedScore<String> score,
IndexedScore<String>? filterOnNonZeros,
}) {
final score = IndexedScore(_packageNames);
assert(score.keys.length == _packageNames.length);
final singularWord = word.length <= 3 || !word.endsWith('s')
? word
: word.substring(0, word.length - 1);
Expand Down Expand Up @@ -543,7 +560,6 @@ class PackageNameIndex {
}
}
}
return score;
}
}

Expand Down
25 changes: 25 additions & 0 deletions app/lib/search/token_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class TokenIndex<K> {
/// scoring.
IndexedScore<K> searchWords(List<String> words, {double weight = 1.0}) {
IndexedScore<K>? score;

weight = math.pow(weight, 1 / words.length).toDouble();
for (final w in words) {
final s = IndexedScore(_ids);
Expand Down Expand Up @@ -142,6 +143,30 @@ extension StringTokenIndexExt on TokenIndex<String> {
}
}

/// A reusable pool for [IndexedScore] instances to spare some memory allocation.
class ScorePool<K> {
final List<K> _keys;
final _pool = <IndexedScore<K>>[];

ScorePool(this._keys);

R withScore<R>({
required double value,
required R Function(IndexedScore<K> score) fn,
}) {
late IndexedScore<K> score;
if (_pool.isNotEmpty) {
score = _pool.removeLast();
score._values.setAll(0, Iterable.generate(score.length, (_) => value));
} else {
score = IndexedScore<K>(_keys, value);
}
final r = fn(score);
_pool.add(score);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic means that we will never have more scores then the high-watermark of scores that we need at one time - right?

Thus we don't really risk leaking excessive memory... right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. We also replace the index every 15 minutes, and if there is an extreme outlier period, its memory footprint won't be around after that.

return r;
}
}

/// Mutable score list that can accessed via integer index.
class IndexedScore<K> {
final List<K> _keys;
Expand Down
Loading