diff --git a/app/lib/search/mem_index.dart b/app/lib/search/mem_index.dart index a3af30bb91..e8a199309c 100644 --- a/app/lib/search/mem_index.dart +++ b/app/lib/search/mem_index.dart @@ -24,9 +24,9 @@ class InMemoryPackageIndex { final List _documents; final _documentsByName = {}; late final PackageNameIndex _packageNameIndex; - late final TokenIndex _descrIndex; - late final TokenIndex _readmeIndex; - late final TokenIndex _apiSymbolIndex; + late final TokenIndex _descrIndex; + late final TokenIndex _readmeIndex; + late final TokenIndex _apiSymbolIndex; /// Adjusted score takes the overall score and transforms /// it linearly into the [0.4-1.0] range. @@ -51,16 +51,17 @@ class InMemoryPackageIndex { InMemoryPackageIndex({ required Iterable documents, }) : _documents = [...documents] { - final apiDocPageKeys = []; + final apiDocPageKeys = []; final apiDocPageValues = []; - for (final doc in _documents) { + for (var i = 0; i < _documents.length; i++) { + final doc = _documents[i]; _documentsByName[doc.package] = doc; final apiDocPages = doc.apiDocPages; if (apiDocPages != null) { for (final page in apiDocPages) { if (page.symbols != null && page.symbols!.isNotEmpty) { - apiDocPageKeys.add(_apiDocPageId(doc.package, page)); + apiDocPageKeys.add(IndexedApiDocPage(i, doc.package, page)); apiDocPageValues.add(page.symbols!.join(' ')); } } @@ -233,8 +234,7 @@ class InMemoryPackageIndex { packageHits = packageHits.map((ps) { final apiPages = textResults.topApiPages[ps.package] // TODO(https://github.com/dart-lang/pub-dev/issues/7106): extract title for the page - ?.map((MapEntry e) => - ApiPageRef(path: _apiDocPath(e.key))) + ?.map((MapEntry e) => ApiPageRef(path: e.key)) .toList(); return ps.change(apiPages: apiPages); }).toList(); @@ -264,7 +264,7 @@ class InMemoryPackageIndex { } _TextResults? _searchText( - IndexedScore packageScores, + IndexedScore packageScores, String? text, { required bool includeNameMatches, }) { @@ -310,64 +310,58 @@ class InMemoryPackageIndex { packageScores.multiplyAllFrom(wordScore); } - final core = packageScores.toScore(); - - var symbolPages = Score.empty; - if (!checkAborted()) { - symbolPages = _apiSymbolIndex.searchWords(words, weight: 0.70); - } - - final apiPackages = {}; final topApiPages = >>{}; const maxApiPageCount = 2; - for (final entry in symbolPages.entries) { - final pkg = _apiDocPkg(entry.key); - if (!packages.contains(pkg)) continue; - - // skip if the previously found pages are better than the current one - final pages = topApiPages.putIfAbsent(pkg, () => []); - if (pages.length >= maxApiPageCount && pages.last.value > entry.value) { - continue; - } + if (!checkAborted()) { + final symbolPages = _apiSymbolIndex.searchWords(words, weight: 0.70); - // update the top api packages score - apiPackages[pkg] = math.max(entry.value, apiPackages[pkg] ?? 0.0); + for (var i = 0; i < symbolPages.length; i++) { + final value = symbolPages.getValue(i); + if (value < 0.01) continue; - // add the page and re-sort the current results - pages.add(entry); - if (pages.length > 1) { - pages.sort((a, b) => -a.value.compareTo(b.value)); - } - // keep the results limited to the max count - if (pages.length > maxApiPageCount) { - pages.removeLast(); + final doc = symbolPages.keys[i]; + if (!packages.contains(doc.package)) continue; + + // skip if the previously found pages are better than the current one + final pages = topApiPages.putIfAbsent(doc.package, () => []); + if (pages.length >= maxApiPageCount && pages.last.value > value) { + continue; + } + + // update the top api packages score + packageScores.setValueMaxOf(doc.index, value); + + // add the page and re-sort the current results + pages.add(MapEntry(doc.page.relativePath, value)); + if (pages.length > 1) { + pages.sort((a, b) => -a.value.compareTo(b.value)); + } + + // keep the results limited to the max count + if (pages.length > maxApiPageCount) { + pages.removeLast(); + } } } - final apiPkgScore = Score(apiPackages); - var score = Score.max([core, apiPkgScore]) - .removeLowValues(fraction: 0.2, minValue: 0.01); - // filter results based on exact phrases final phrases = extractExactPhrases(text); if (!aborted && phrases.isNotEmpty) { - final matched = {}; - for (final MapEntry(key: package, value: packageScore) - in score.entries) { - final doc = _documentsByName[package]!; - final bool matchedAllPhrases = phrases.every((phrase) => + for (var i = 0; i < packageScores.length; i++) { + if (packageScores.isNotPositive(i)) continue; + final doc = _documents[i]; + final matchedAllPhrases = phrases.every((phrase) => doc.package.contains(phrase) || doc.description!.contains(phrase) || doc.readme!.contains(phrase)); - if (matchedAllPhrases) { - matched[package] = packageScore; + if (!matchedAllPhrases) { + packageScores.setValue(i, 0); } } - score = Score(matched); } return _TextResults( - score, + packageScores.toScore(), topApiPages, nameMatches: nameMatches?.toList(), ); @@ -441,18 +435,6 @@ class InMemoryPackageIndex { if (x != 0) return x; return _compareUpdated(a, b); } - - String _apiDocPageId(String package, ApiDocPage page) { - return '$package::${page.relativePath}'; - } - - String _apiDocPkg(String id) { - return id.split('::').first; - } - - String _apiDocPath(String id) { - return id.split('::').last; - } } class _TextResults { @@ -494,7 +476,7 @@ class PackageNameIndex { /// Search [text] and return the matching packages with scores. @visibleForTesting Score search(String text) { - IndexedScore? score; + IndexedScore? score; for (final w in splitForQuery(text)) { final s = searchWord(w, filterOnNonZeros: score); if (score == null) { @@ -511,9 +493,9 @@ class PackageNameIndex { /// /// When [filterOnNonZeros] is present, only the indexes with an already /// non-zero value are evaluated. - IndexedScore searchWord( + IndexedScore searchWord( String word, { - IndexedScore? filterOnNonZeros, + IndexedScore? filterOnNonZeros, }) { final score = IndexedScore(_packageNames); final singularWord = word.length <= 3 || !word.endsWith('s') @@ -570,3 +552,11 @@ class IndexedPackageHit { IndexedPackageHit(this.index, this.hit); } + +class IndexedApiDocPage { + final int index; + final String package; + final ApiDocPage page; + + IndexedApiDocPage(this.index, this.package, this.page); +} diff --git a/app/lib/search/sdk_mem_index.dart b/app/lib/search/sdk_mem_index.dart index 0e9b8e49c0..f1887d82a6 100644 --- a/app/lib/search/sdk_mem_index.dart +++ b/app/lib/search/sdk_mem_index.dart @@ -18,7 +18,7 @@ class SdkMemIndex { final String _sdk; final String? _version; final Uri _baseUri; - final _tokensPerLibrary = {}; + final _tokensPerLibrary = >{}; final _baseUriPerLibrary = {}; final _descriptionPerLibrary = {}; final _libraryWeights = {}; @@ -135,7 +135,8 @@ class SdkMemIndex { final isQualifiedQuery = query.contains(library.split(':').last); final tokens = _tokensPerLibrary[library]!; - final plainResults = tokens.searchWords(words).top(3, minValue: 0.05); + final plainResults = + tokens.searchWords(words).toScore().top(3, minValue: 0.05); if (plainResults.isEmpty) continue; final libraryWeight = _libraryWeights[library] ?? 1.0; diff --git a/app/lib/search/token_index.dart b/app/lib/search/token_index.dart index 32f396b8d1..127174861d 100644 --- a/app/lib/search/token_index.dart +++ b/app/lib/search/token_index.dart @@ -141,15 +141,15 @@ class TokenMatch { } /// Stores a token -> documentId inverted index with weights. -class TokenIndex { - final List _ids; +class TokenIndex { + final List _ids; /// Maps token Strings to a weighted documents (addressed via indexes). final _inverseIds = >{}; late final _length = _ids.length; - TokenIndex(List ids, List values) : _ids = ids { + TokenIndex(List ids, List values) : _ids = ids { assert(ids.length == values.length); final length = values.length; for (var i = 0; i < length; i++) { @@ -172,7 +172,7 @@ class TokenIndex { } } - factory TokenIndex.fromMap(Map map) { + factory TokenIndex.fromMap(Map map) { final keys = map.keys.toList(); final values = map.values.toList(); return TokenIndex(keys, values); @@ -206,18 +206,10 @@ class TokenIndex { return tokenMatch; } - /// Search the index for [text], with a (term-match / document coverage percent) - /// scoring. - @visibleForTesting - Map search(String text) { - return searchWords(splitForQuery(text))._values; - } - /// Search the index for [words], with a (term-match / document coverage percent) /// scoring. - Score searchWords(List words, {double weight = 1.0}) { - if (words.isEmpty) return Score.empty; - IndexedScore? score; + IndexedScore searchWords(List words, {double weight = 1.0}) { + IndexedScore? score; weight = math.pow(weight, 1 / words.length).toDouble(); for (final w in words) { final s = IndexedScore(_ids); @@ -228,7 +220,7 @@ class TokenIndex { score.multiplyAllFrom(s); } } - return score?.toScore() ?? Score.empty; + return score ?? IndexedScore(_ids); } /// Searches the index with [word] and stores the results in [score], using @@ -250,16 +242,26 @@ class TokenIndex { } } +extension StringTokenIndexExt on TokenIndex { + /// Search the index for [text], with a (term-match / document coverage percent) + /// scoring. + @visibleForTesting + Map search(String text) { + return searchWords(splitForQuery(text)).toScore(); + } +} + /// Mutable score list that can accessed via integer index. -class IndexedScore { - final List _keys; +class IndexedScore { + final List _keys; final List _values; IndexedScore._(this._keys, this._values); - factory IndexedScore(List keys, [double value = 0.0]) => + factory IndexedScore(List keys, [double value = 0.0]) => IndexedScore._(keys, List.filled(keys.length, value)); + List get keys => _keys; late final length = _values.length; bool isPositive(int index) { @@ -270,6 +272,10 @@ class IndexedScore { return _values[index] <= 0.0; } + double getValue(int index) { + return _values[index]; + } + void setValue(int index, double value) { _values[index] = value; } @@ -278,7 +284,7 @@ class IndexedScore { _values[index] = math.max(_values[index], value); } - void removeWhere(bool Function(int index, String key) fn) { + void removeWhere(bool Function(int index, K key) fn) { for (var i = 0; i < length; i++) { if (isNotPositive(i)) continue; if (fn(i, _keys[i])) { @@ -287,7 +293,7 @@ class IndexedScore { } } - void retainWhere(bool Function(int index, String key) fn) { + void retainWhere(bool Function(int index, K key) fn) { for (var i = 0; i < length; i++) { if (isNotPositive(i)) continue; if (!fn(i, _keys[i])) { @@ -305,8 +311,8 @@ class IndexedScore { } } - Set toKeySet() { - final set = {}; + Set toKeySet() { + final set = {}; for (var i = 0; i < _values.length; i++) { final v = _values[i]; if (v > 0.0) { @@ -315,7 +321,9 @@ class IndexedScore { } return set; } +} +extension StringIndexedScoreExt on IndexedScore { Score toScore() { final map = {}; for (var i = 0; i < _values.length; i++) { diff --git a/app/test/search/token_index_test.dart b/app/test/search/token_index_test.dart index d9b8983e6f..e4a0abc095 100644 --- a/app/test/search/token_index_test.dart +++ b/app/test/search/token_index_test.dart @@ -18,7 +18,7 @@ void main() { }); test('No match', () { - final TokenIndex index = TokenIndex.fromMap({ + final index = TokenIndex.fromMap({ 'uri://http': 'http', 'uri://http_magic': 'http_magic', }); @@ -30,7 +30,7 @@ void main() { }); test('Scoring exact and partial matches', () { - final TokenIndex index = TokenIndex.fromMap({ + final index = TokenIndex.fromMap({ 'uri://http': 'http', 'uri://http_magic': 'http_magic', }); @@ -42,7 +42,7 @@ void main() { test('CamelCase indexing', () { final String queueText = '.DoubleLinkedQueue()'; - final TokenIndex index = TokenIndex.fromMap({ + final index = TokenIndex.fromMap({ 'queue': queueText, 'queue_lower': queueText.toLowerCase(), 'unmodifiable': 'CustomUnmodifiableMapBase', @@ -57,7 +57,7 @@ void main() { }); test('Wierd cases: riak client', () { - final TokenIndex index = TokenIndex.fromMap({ + final index = TokenIndex.fromMap({ 'uri://cli': 'cli', 'uri://riak_client': 'riak_client', 'uri://teamspeak': 'teamspeak',