Skip to content

Commit 1c6026e

Browse files
authored
Use IndexedScore for faster API doc and exact phrase scoring. (#8258)
1 parent 820a531 commit 1c6026e

File tree

4 files changed

+91
-92
lines changed

4 files changed

+91
-92
lines changed

app/lib/search/mem_index.dart

Lines changed: 54 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ class InMemoryPackageIndex {
2424
final List<PackageDocument> _documents;
2525
final _documentsByName = <String, PackageDocument>{};
2626
late final PackageNameIndex _packageNameIndex;
27-
late final TokenIndex _descrIndex;
28-
late final TokenIndex _readmeIndex;
29-
late final TokenIndex _apiSymbolIndex;
27+
late final TokenIndex<String> _descrIndex;
28+
late final TokenIndex<String> _readmeIndex;
29+
late final TokenIndex<IndexedApiDocPage> _apiSymbolIndex;
3030

3131
/// Adjusted score takes the overall score and transforms
3232
/// it linearly into the [0.4-1.0] range.
@@ -51,16 +51,17 @@ class InMemoryPackageIndex {
5151
InMemoryPackageIndex({
5252
required Iterable<PackageDocument> documents,
5353
}) : _documents = [...documents] {
54-
final apiDocPageKeys = <String>[];
54+
final apiDocPageKeys = <IndexedApiDocPage>[];
5555
final apiDocPageValues = <String>[];
56-
for (final doc in _documents) {
56+
for (var i = 0; i < _documents.length; i++) {
57+
final doc = _documents[i];
5758
_documentsByName[doc.package] = doc;
5859

5960
final apiDocPages = doc.apiDocPages;
6061
if (apiDocPages != null) {
6162
for (final page in apiDocPages) {
6263
if (page.symbols != null && page.symbols!.isNotEmpty) {
63-
apiDocPageKeys.add(_apiDocPageId(doc.package, page));
64+
apiDocPageKeys.add(IndexedApiDocPage(i, doc.package, page));
6465
apiDocPageValues.add(page.symbols!.join(' '));
6566
}
6667
}
@@ -233,8 +234,7 @@ class InMemoryPackageIndex {
233234
packageHits = packageHits.map((ps) {
234235
final apiPages = textResults.topApiPages[ps.package]
235236
// TODO(https://github.com/dart-lang/pub-dev/issues/7106): extract title for the page
236-
?.map((MapEntry<String, double> e) =>
237-
ApiPageRef(path: _apiDocPath(e.key)))
237+
?.map((MapEntry<String, double> e) => ApiPageRef(path: e.key))
238238
.toList();
239239
return ps.change(apiPages: apiPages);
240240
}).toList();
@@ -264,7 +264,7 @@ class InMemoryPackageIndex {
264264
}
265265

266266
_TextResults? _searchText(
267-
IndexedScore packageScores,
267+
IndexedScore<String> packageScores,
268268
String? text, {
269269
required bool includeNameMatches,
270270
}) {
@@ -310,64 +310,58 @@ class InMemoryPackageIndex {
310310
packageScores.multiplyAllFrom(wordScore);
311311
}
312312

313-
final core = packageScores.toScore();
314-
315-
var symbolPages = Score.empty;
316-
if (!checkAborted()) {
317-
symbolPages = _apiSymbolIndex.searchWords(words, weight: 0.70);
318-
}
319-
320-
final apiPackages = <String, double>{};
321313
final topApiPages = <String, List<MapEntry<String, double>>>{};
322314
const maxApiPageCount = 2;
323-
for (final entry in symbolPages.entries) {
324-
final pkg = _apiDocPkg(entry.key);
325-
if (!packages.contains(pkg)) continue;
326-
327-
// skip if the previously found pages are better than the current one
328-
final pages = topApiPages.putIfAbsent(pkg, () => []);
329-
if (pages.length >= maxApiPageCount && pages.last.value > entry.value) {
330-
continue;
331-
}
315+
if (!checkAborted()) {
316+
final symbolPages = _apiSymbolIndex.searchWords(words, weight: 0.70);
332317

333-
// update the top api packages score
334-
apiPackages[pkg] = math.max(entry.value, apiPackages[pkg] ?? 0.0);
318+
for (var i = 0; i < symbolPages.length; i++) {
319+
final value = symbolPages.getValue(i);
320+
if (value < 0.01) continue;
335321

336-
// add the page and re-sort the current results
337-
pages.add(entry);
338-
if (pages.length > 1) {
339-
pages.sort((a, b) => -a.value.compareTo(b.value));
340-
}
341-
// keep the results limited to the max count
342-
if (pages.length > maxApiPageCount) {
343-
pages.removeLast();
322+
final doc = symbolPages.keys[i];
323+
if (!packages.contains(doc.package)) continue;
324+
325+
// skip if the previously found pages are better than the current one
326+
final pages = topApiPages.putIfAbsent(doc.package, () => []);
327+
if (pages.length >= maxApiPageCount && pages.last.value > value) {
328+
continue;
329+
}
330+
331+
// update the top api packages score
332+
packageScores.setValueMaxOf(doc.index, value);
333+
334+
// add the page and re-sort the current results
335+
pages.add(MapEntry(doc.page.relativePath, value));
336+
if (pages.length > 1) {
337+
pages.sort((a, b) => -a.value.compareTo(b.value));
338+
}
339+
340+
// keep the results limited to the max count
341+
if (pages.length > maxApiPageCount) {
342+
pages.removeLast();
343+
}
344344
}
345345
}
346346

347-
final apiPkgScore = Score(apiPackages);
348-
var score = Score.max([core, apiPkgScore])
349-
.removeLowValues(fraction: 0.2, minValue: 0.01);
350-
351347
// filter results based on exact phrases
352348
final phrases = extractExactPhrases(text);
353349
if (!aborted && phrases.isNotEmpty) {
354-
final matched = <String, double>{};
355-
for (final MapEntry(key: package, value: packageScore)
356-
in score.entries) {
357-
final doc = _documentsByName[package]!;
358-
final bool matchedAllPhrases = phrases.every((phrase) =>
350+
for (var i = 0; i < packageScores.length; i++) {
351+
if (packageScores.isNotPositive(i)) continue;
352+
final doc = _documents[i];
353+
final matchedAllPhrases = phrases.every((phrase) =>
359354
doc.package.contains(phrase) ||
360355
doc.description!.contains(phrase) ||
361356
doc.readme!.contains(phrase));
362-
if (matchedAllPhrases) {
363-
matched[package] = packageScore;
357+
if (!matchedAllPhrases) {
358+
packageScores.setValue(i, 0);
364359
}
365360
}
366-
score = Score(matched);
367361
}
368362

369363
return _TextResults(
370-
score,
364+
packageScores.toScore(),
371365
topApiPages,
372366
nameMatches: nameMatches?.toList(),
373367
);
@@ -441,18 +435,6 @@ class InMemoryPackageIndex {
441435
if (x != 0) return x;
442436
return _compareUpdated(a, b);
443437
}
444-
445-
String _apiDocPageId(String package, ApiDocPage page) {
446-
return '$package::${page.relativePath}';
447-
}
448-
449-
String _apiDocPkg(String id) {
450-
return id.split('::').first;
451-
}
452-
453-
String _apiDocPath(String id) {
454-
return id.split('::').last;
455-
}
456438
}
457439

458440
class _TextResults {
@@ -494,7 +476,7 @@ class PackageNameIndex {
494476
/// Search [text] and return the matching packages with scores.
495477
@visibleForTesting
496478
Score search(String text) {
497-
IndexedScore? score;
479+
IndexedScore<String>? score;
498480
for (final w in splitForQuery(text)) {
499481
final s = searchWord(w, filterOnNonZeros: score);
500482
if (score == null) {
@@ -511,9 +493,9 @@ class PackageNameIndex {
511493
///
512494
/// When [filterOnNonZeros] is present, only the indexes with an already
513495
/// non-zero value are evaluated.
514-
IndexedScore searchWord(
496+
IndexedScore<String> searchWord(
515497
String word, {
516-
IndexedScore? filterOnNonZeros,
498+
IndexedScore<String>? filterOnNonZeros,
517499
}) {
518500
final score = IndexedScore(_packageNames);
519501
final singularWord = word.length <= 3 || !word.endsWith('s')
@@ -570,3 +552,11 @@ class IndexedPackageHit {
570552

571553
IndexedPackageHit(this.index, this.hit);
572554
}
555+
556+
class IndexedApiDocPage {
557+
final int index;
558+
final String package;
559+
final ApiDocPage page;
560+
561+
IndexedApiDocPage(this.index, this.package, this.page);
562+
}

app/lib/search/sdk_mem_index.dart

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class SdkMemIndex {
1818
final String _sdk;
1919
final String? _version;
2020
final Uri _baseUri;
21-
final _tokensPerLibrary = <String, TokenIndex>{};
21+
final _tokensPerLibrary = <String, TokenIndex<String>>{};
2222
final _baseUriPerLibrary = <String, String>{};
2323
final _descriptionPerLibrary = <String, String>{};
2424
final _libraryWeights = <String, double>{};
@@ -135,7 +135,8 @@ class SdkMemIndex {
135135
final isQualifiedQuery = query.contains(library.split(':').last);
136136

137137
final tokens = _tokensPerLibrary[library]!;
138-
final plainResults = tokens.searchWords(words).top(3, minValue: 0.05);
138+
final plainResults =
139+
tokens.searchWords(words).toScore().top(3, minValue: 0.05);
139140
if (plainResults.isEmpty) continue;
140141

141142
final libraryWeight = _libraryWeights[library] ?? 1.0;

app/lib/search/token_index.dart

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,15 @@ class TokenMatch {
141141
}
142142

143143
/// Stores a token -> documentId inverted index with weights.
144-
class TokenIndex {
145-
final List<String> _ids;
144+
class TokenIndex<K> {
145+
final List<K> _ids;
146146

147147
/// Maps token Strings to a weighted documents (addressed via indexes).
148148
final _inverseIds = <String, Map<int, double>>{};
149149

150150
late final _length = _ids.length;
151151

152-
TokenIndex(List<String> ids, List<String?> values) : _ids = ids {
152+
TokenIndex(List<K> ids, List<String?> values) : _ids = ids {
153153
assert(ids.length == values.length);
154154
final length = values.length;
155155
for (var i = 0; i < length; i++) {
@@ -172,7 +172,7 @@ class TokenIndex {
172172
}
173173
}
174174

175-
factory TokenIndex.fromMap(Map<String, String> map) {
175+
factory TokenIndex.fromMap(Map<K, String> map) {
176176
final keys = map.keys.toList();
177177
final values = map.values.toList();
178178
return TokenIndex(keys, values);
@@ -206,18 +206,10 @@ class TokenIndex {
206206
return tokenMatch;
207207
}
208208

209-
/// Search the index for [text], with a (term-match / document coverage percent)
210-
/// scoring.
211-
@visibleForTesting
212-
Map<String, double> search(String text) {
213-
return searchWords(splitForQuery(text))._values;
214-
}
215-
216209
/// Search the index for [words], with a (term-match / document coverage percent)
217210
/// scoring.
218-
Score searchWords(List<String> words, {double weight = 1.0}) {
219-
if (words.isEmpty) return Score.empty;
220-
IndexedScore? score;
211+
IndexedScore<K> searchWords(List<String> words, {double weight = 1.0}) {
212+
IndexedScore<K>? score;
221213
weight = math.pow(weight, 1 / words.length).toDouble();
222214
for (final w in words) {
223215
final s = IndexedScore(_ids);
@@ -228,7 +220,7 @@ class TokenIndex {
228220
score.multiplyAllFrom(s);
229221
}
230222
}
231-
return score?.toScore() ?? Score.empty;
223+
return score ?? IndexedScore(_ids);
232224
}
233225

234226
/// Searches the index with [word] and stores the results in [score], using
@@ -250,16 +242,26 @@ class TokenIndex {
250242
}
251243
}
252244

245+
extension StringTokenIndexExt on TokenIndex<String> {
246+
/// Search the index for [text], with a (term-match / document coverage percent)
247+
/// scoring.
248+
@visibleForTesting
249+
Map<String, double> search(String text) {
250+
return searchWords(splitForQuery(text)).toScore();
251+
}
252+
}
253+
253254
/// Mutable score list that can accessed via integer index.
254-
class IndexedScore {
255-
final List<String> _keys;
255+
class IndexedScore<K> {
256+
final List<K> _keys;
256257
final List<double> _values;
257258

258259
IndexedScore._(this._keys, this._values);
259260

260-
factory IndexedScore(List<String> keys, [double value = 0.0]) =>
261+
factory IndexedScore(List<K> keys, [double value = 0.0]) =>
261262
IndexedScore._(keys, List<double>.filled(keys.length, value));
262263

264+
List<K> get keys => _keys;
263265
late final length = _values.length;
264266

265267
bool isPositive(int index) {
@@ -270,6 +272,10 @@ class IndexedScore {
270272
return _values[index] <= 0.0;
271273
}
272274

275+
double getValue(int index) {
276+
return _values[index];
277+
}
278+
273279
void setValue(int index, double value) {
274280
_values[index] = value;
275281
}
@@ -278,7 +284,7 @@ class IndexedScore {
278284
_values[index] = math.max(_values[index], value);
279285
}
280286

281-
void removeWhere(bool Function(int index, String key) fn) {
287+
void removeWhere(bool Function(int index, K key) fn) {
282288
for (var i = 0; i < length; i++) {
283289
if (isNotPositive(i)) continue;
284290
if (fn(i, _keys[i])) {
@@ -287,7 +293,7 @@ class IndexedScore {
287293
}
288294
}
289295

290-
void retainWhere(bool Function(int index, String key) fn) {
296+
void retainWhere(bool Function(int index, K key) fn) {
291297
for (var i = 0; i < length; i++) {
292298
if (isNotPositive(i)) continue;
293299
if (!fn(i, _keys[i])) {
@@ -305,8 +311,8 @@ class IndexedScore {
305311
}
306312
}
307313

308-
Set<String> toKeySet() {
309-
final set = <String>{};
314+
Set<K> toKeySet() {
315+
final set = <K>{};
310316
for (var i = 0; i < _values.length; i++) {
311317
final v = _values[i];
312318
if (v > 0.0) {
@@ -315,7 +321,9 @@ class IndexedScore {
315321
}
316322
return set;
317323
}
324+
}
318325

326+
extension StringIndexedScoreExt on IndexedScore<String> {
319327
Score toScore() {
320328
final map = <String, double>{};
321329
for (var i = 0; i < _values.length; i++) {

app/test/search/token_index_test.dart

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ void main() {
1818
});
1919

2020
test('No match', () {
21-
final TokenIndex index = TokenIndex.fromMap({
21+
final index = TokenIndex.fromMap({
2222
'uri://http': 'http',
2323
'uri://http_magic': 'http_magic',
2424
});
@@ -30,7 +30,7 @@ void main() {
3030
});
3131

3232
test('Scoring exact and partial matches', () {
33-
final TokenIndex index = TokenIndex.fromMap({
33+
final index = TokenIndex.fromMap({
3434
'uri://http': 'http',
3535
'uri://http_magic': 'http_magic',
3636
});
@@ -42,7 +42,7 @@ void main() {
4242

4343
test('CamelCase indexing', () {
4444
final String queueText = '.DoubleLinkedQueue()';
45-
final TokenIndex index = TokenIndex.fromMap({
45+
final index = TokenIndex.fromMap({
4646
'queue': queueText,
4747
'queue_lower': queueText.toLowerCase(),
4848
'unmodifiable': 'CustomUnmodifiableMapBase',
@@ -57,7 +57,7 @@ void main() {
5757
});
5858

5959
test('Wierd cases: riak client', () {
60-
final TokenIndex index = TokenIndex.fromMap({
60+
final index = TokenIndex.fromMap({
6161
'uri://cli': 'cli',
6262
'uri://riak_client': 'riak_client',
6363
'uri://teamspeak': 'teamspeak',

0 commit comments

Comments
 (0)