Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 54 additions & 64 deletions app/lib/search/mem_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ class InMemoryPackageIndex {
final List<PackageDocument> _documents;
final _documentsByName = <String, PackageDocument>{};
late final PackageNameIndex _packageNameIndex;
late final TokenIndex _descrIndex;
late final TokenIndex _readmeIndex;
late final TokenIndex _apiSymbolIndex;
late final TokenIndex<String> _descrIndex;
late final TokenIndex<String> _readmeIndex;
late final TokenIndex<IndexedApiDocPage> _apiSymbolIndex;

/// Adjusted score takes the overall score and transforms
/// it linearly into the [0.4-1.0] range.
Expand All @@ -51,16 +51,17 @@ class InMemoryPackageIndex {
InMemoryPackageIndex({
required Iterable<PackageDocument> documents,
}) : _documents = [...documents] {
final apiDocPageKeys = <String>[];
final apiDocPageKeys = <IndexedApiDocPage>[];
final apiDocPageValues = <String>[];
for (final doc in _documents) {
for (var i = 0; i < _documents.length; i++) {
final doc = _documents[i];
_documentsByName[doc.package] = doc;

final apiDocPages = doc.apiDocPages;
if (apiDocPages != null) {
for (final page in apiDocPages) {
if (page.symbols != null && page.symbols!.isNotEmpty) {
apiDocPageKeys.add(_apiDocPageId(doc.package, page));
apiDocPageKeys.add(IndexedApiDocPage(i, doc.package, page));
apiDocPageValues.add(page.symbols!.join(' '));
}
}
Expand Down Expand Up @@ -233,8 +234,7 @@ class InMemoryPackageIndex {
packageHits = packageHits.map((ps) {
final apiPages = textResults.topApiPages[ps.package]
// TODO(https://github.com/dart-lang/pub-dev/issues/7106): extract title for the page
?.map((MapEntry<String, double> e) =>
ApiPageRef(path: _apiDocPath(e.key)))
?.map((MapEntry<String, double> e) => ApiPageRef(path: e.key))
.toList();
return ps.change(apiPages: apiPages);
}).toList();
Expand Down Expand Up @@ -264,7 +264,7 @@ class InMemoryPackageIndex {
}

_TextResults? _searchText(
IndexedScore packageScores,
IndexedScore<String> packageScores,
String? text, {
required bool includeNameMatches,
}) {
Expand Down Expand Up @@ -310,64 +310,58 @@ class InMemoryPackageIndex {
packageScores.multiplyAllFrom(wordScore);
}

final core = packageScores.toScore();

var symbolPages = Score.empty;
if (!checkAborted()) {
symbolPages = _apiSymbolIndex.searchWords(words, weight: 0.70);
}

final apiPackages = <String, double>{};
final topApiPages = <String, List<MapEntry<String, double>>>{};
const maxApiPageCount = 2;
for (final entry in symbolPages.entries) {
final pkg = _apiDocPkg(entry.key);
if (!packages.contains(pkg)) continue;

// skip if the previously found pages are better than the current one
final pages = topApiPages.putIfAbsent(pkg, () => []);
if (pages.length >= maxApiPageCount && pages.last.value > entry.value) {
continue;
}
if (!checkAborted()) {
final symbolPages = _apiSymbolIndex.searchWords(words, weight: 0.70);

// update the top api packages score
apiPackages[pkg] = math.max(entry.value, apiPackages[pkg] ?? 0.0);
for (var i = 0; i < symbolPages.length; i++) {
final value = symbolPages.getValue(i);
if (value < 0.01) continue;

// add the page and re-sort the current results
pages.add(entry);
if (pages.length > 1) {
pages.sort((a, b) => -a.value.compareTo(b.value));
}
// keep the results limited to the max count
if (pages.length > maxApiPageCount) {
pages.removeLast();
final doc = symbolPages.keys[i];
if (!packages.contains(doc.package)) continue;

// skip if the previously found pages are better than the current one
final pages = topApiPages.putIfAbsent(doc.package, () => []);
if (pages.length >= maxApiPageCount && pages.last.value > value) {
continue;
}

// update the top api packages score
packageScores.setValueMaxOf(doc.index, value);

// add the page and re-sort the current results
pages.add(MapEntry(doc.page.relativePath, value));
if (pages.length > 1) {
pages.sort((a, b) => -a.value.compareTo(b.value));
}

// keep the results limited to the max count
if (pages.length > maxApiPageCount) {
pages.removeLast();
}
}
}

final apiPkgScore = Score(apiPackages);
var score = Score.max([core, apiPkgScore])
.removeLowValues(fraction: 0.2, minValue: 0.01);

// filter results based on exact phrases
final phrases = extractExactPhrases(text);
if (!aborted && phrases.isNotEmpty) {
final matched = <String, double>{};
for (final MapEntry(key: package, value: packageScore)
in score.entries) {
final doc = _documentsByName[package]!;
final bool matchedAllPhrases = phrases.every((phrase) =>
for (var i = 0; i < packageScores.length; i++) {
if (packageScores.isNotPositive(i)) continue;
final doc = _documents[i];
final matchedAllPhrases = phrases.every((phrase) =>
doc.package.contains(phrase) ||
doc.description!.contains(phrase) ||
doc.readme!.contains(phrase));
if (matchedAllPhrases) {
matched[package] = packageScore;
if (!matchedAllPhrases) {
packageScores.setValue(i, 0);
}
}
score = Score(matched);
}

return _TextResults(
score,
packageScores.toScore(),
topApiPages,
nameMatches: nameMatches?.toList(),
);
Expand Down Expand Up @@ -441,18 +435,6 @@ class InMemoryPackageIndex {
if (x != 0) return x;
return _compareUpdated(a, b);
}

String _apiDocPageId(String package, ApiDocPage page) {
return '$package::${page.relativePath}';
}

String _apiDocPkg(String id) {
return id.split('::').first;
}

String _apiDocPath(String id) {
return id.split('::').last;
}
}

class _TextResults {
Expand Down Expand Up @@ -494,7 +476,7 @@ class PackageNameIndex {
/// Search [text] and return the matching packages with scores.
@visibleForTesting
Score search(String text) {
IndexedScore? score;
IndexedScore<String>? score;
for (final w in splitForQuery(text)) {
final s = searchWord(w, filterOnNonZeros: score);
if (score == null) {
Expand All @@ -511,9 +493,9 @@ class PackageNameIndex {
///
/// When [filterOnNonZeros] is present, only the indexes with an already
/// non-zero value are evaluated.
IndexedScore searchWord(
IndexedScore<String> searchWord(
String word, {
IndexedScore? filterOnNonZeros,
IndexedScore<String>? filterOnNonZeros,
}) {
final score = IndexedScore(_packageNames);
final singularWord = word.length <= 3 || !word.endsWith('s')
Expand Down Expand Up @@ -570,3 +552,11 @@ class IndexedPackageHit {

IndexedPackageHit(this.index, this.hit);
}

class IndexedApiDocPage {
final int index;
final String package;
final ApiDocPage page;

IndexedApiDocPage(this.index, this.package, this.page);
}
5 changes: 3 additions & 2 deletions app/lib/search/sdk_mem_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class SdkMemIndex {
final String _sdk;
final String? _version;
final Uri _baseUri;
final _tokensPerLibrary = <String, TokenIndex>{};
final _tokensPerLibrary = <String, TokenIndex<String>>{};
final _baseUriPerLibrary = <String, String>{};
final _descriptionPerLibrary = <String, String>{};
final _libraryWeights = <String, double>{};
Expand Down Expand Up @@ -135,7 +135,8 @@ class SdkMemIndex {
final isQualifiedQuery = query.contains(library.split(':').last);

final tokens = _tokensPerLibrary[library]!;
final plainResults = tokens.searchWords(words).top(3, minValue: 0.05);
final plainResults =
tokens.searchWords(words).toScore().top(3, minValue: 0.05);
if (plainResults.isEmpty) continue;

final libraryWeight = _libraryWeights[library] ?? 1.0;
Expand Down
52 changes: 30 additions & 22 deletions app/lib/search/token_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,15 @@ class TokenMatch {
}

/// Stores a token -> documentId inverted index with weights.
class TokenIndex {
final List<String> _ids;
class TokenIndex<K> {
final List<K> _ids;

/// Maps token Strings to a weighted documents (addressed via indexes).
final _inverseIds = <String, Map<int, double>>{};

late final _length = _ids.length;

TokenIndex(List<String> ids, List<String?> values) : _ids = ids {
TokenIndex(List<K> ids, List<String?> values) : _ids = ids {
assert(ids.length == values.length);
final length = values.length;
for (var i = 0; i < length; i++) {
Expand All @@ -172,7 +172,7 @@ class TokenIndex {
}
}

factory TokenIndex.fromMap(Map<String, String> map) {
factory TokenIndex.fromMap(Map<K, String> map) {
final keys = map.keys.toList();
final values = map.values.toList();
return TokenIndex(keys, values);
Expand Down Expand Up @@ -206,18 +206,10 @@ class TokenIndex {
return tokenMatch;
}

/// Search the index for [text], with a (term-match / document coverage percent)
/// scoring.
@visibleForTesting
Map<String, double> search(String text) {
return searchWords(splitForQuery(text))._values;
}

/// Search the index for [words], with a (term-match / document coverage percent)
/// scoring.
Score searchWords(List<String> words, {double weight = 1.0}) {
if (words.isEmpty) return Score.empty;
IndexedScore? score;
IndexedScore<K> searchWords(List<String> words, {double weight = 1.0}) {
IndexedScore<K>? score;
weight = math.pow(weight, 1 / words.length).toDouble();
for (final w in words) {
final s = IndexedScore(_ids);
Expand All @@ -228,7 +220,7 @@ class TokenIndex {
score.multiplyAllFrom(s);
}
}
return score?.toScore() ?? Score.empty;
return score ?? IndexedScore(_ids);
}

/// Searches the index with [word] and stores the results in [score], using
Expand All @@ -250,16 +242,26 @@ class TokenIndex {
}
}

extension StringTokenIndexExt on TokenIndex<String> {
/// Search the index for [text], with a (term-match / document coverage percent)
/// scoring.
@visibleForTesting
Map<String, double> search(String text) {
return searchWords(splitForQuery(text)).toScore();
}
}

/// Mutable score list that can accessed via integer index.
class IndexedScore {
final List<String> _keys;
class IndexedScore<K> {
final List<K> _keys;
final List<double> _values;

IndexedScore._(this._keys, this._values);

factory IndexedScore(List<String> keys, [double value = 0.0]) =>
factory IndexedScore(List<K> keys, [double value = 0.0]) =>
IndexedScore._(keys, List<double>.filled(keys.length, value));

List<K> get keys => _keys;
late final length = _values.length;

bool isPositive(int index) {
Expand All @@ -270,6 +272,10 @@ class IndexedScore {
return _values[index] <= 0.0;
}

double getValue(int index) {
return _values[index];
}

void setValue(int index, double value) {
_values[index] = value;
}
Expand All @@ -278,7 +284,7 @@ class IndexedScore {
_values[index] = math.max(_values[index], value);
}

void removeWhere(bool Function(int index, String key) fn) {
void removeWhere(bool Function(int index, K key) fn) {
for (var i = 0; i < length; i++) {
if (isNotPositive(i)) continue;
if (fn(i, _keys[i])) {
Expand All @@ -287,7 +293,7 @@ class IndexedScore {
}
}

void retainWhere(bool Function(int index, String key) fn) {
void retainWhere(bool Function(int index, K key) fn) {
for (var i = 0; i < length; i++) {
if (isNotPositive(i)) continue;
if (!fn(i, _keys[i])) {
Expand All @@ -305,8 +311,8 @@ class IndexedScore {
}
}

Set<String> toKeySet() {
final set = <String>{};
Set<K> toKeySet() {
final set = <K>{};
for (var i = 0; i < _values.length; i++) {
final v = _values[i];
if (v > 0.0) {
Expand All @@ -315,7 +321,9 @@ class IndexedScore {
}
return set;
}
}

extension StringIndexedScoreExt on IndexedScore<String> {
Score toScore() {
final map = <String, double>{};
for (var i = 0; i < _values.length; i++) {
Expand Down
8 changes: 4 additions & 4 deletions app/test/search/token_index_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ void main() {
});

test('No match', () {
final TokenIndex index = TokenIndex.fromMap({
final index = TokenIndex.fromMap({
'uri://http': 'http',
'uri://http_magic': 'http_magic',
});
Expand All @@ -30,7 +30,7 @@ void main() {
});

test('Scoring exact and partial matches', () {
final TokenIndex index = TokenIndex.fromMap({
final index = TokenIndex.fromMap({
'uri://http': 'http',
'uri://http_magic': 'http_magic',
});
Expand All @@ -42,7 +42,7 @@ void main() {

test('CamelCase indexing', () {
final String queueText = '.DoubleLinkedQueue()';
final TokenIndex index = TokenIndex.fromMap({
final index = TokenIndex.fromMap({
'queue': queueText,
'queue_lower': queueText.toLowerCase(),
'unmodifiable': 'CustomUnmodifiableMapBase',
Expand All @@ -57,7 +57,7 @@ void main() {
});

test('Wierd cases: riak client', () {
final TokenIndex index = TokenIndex.fromMap({
final index = TokenIndex.fromMap({
'uri://cli': 'cli',
'uri://riak_client': 'riak_client',
'uri://teamspeak': 'teamspeak',
Expand Down
Loading