diff --git a/app/lib/search/mem_index.dart b/app/lib/search/mem_index.dart index b57abd3ce2..53d5e46784 100644 --- a/app/lib/search/mem_index.dart +++ b/app/lib/search/mem_index.dart @@ -12,7 +12,6 @@ import 'package:meta/meta.dart'; import 'package:pub_dev/service/topics/models.dart'; import 'package:pub_dev/third_party/bit_array/bit_array.dart'; -import '../shared/utils.dart' show boundedList; import 'models.dart'; import 'search_service.dart'; import 'text_utils.dart'; @@ -142,9 +141,9 @@ class InMemoryPackageIndex { return PackageSearchResult.empty(); } return _bitArrayPool.withPoolItem(fn: (array) { - return _scorePool.withPoolItem( - fn: (score) { - return _search(query, array, score); + return _scorePool.withItemGetter( + (scoreFn) { + return _search(query, array, scoreFn); }, ); }); @@ -220,88 +219,107 @@ class InMemoryPackageIndex { PackageSearchResult _search( ServiceSearchQuery query, BitArray packages, - IndexedScore packageScores, + IndexedScore Function() scoreFn, ) { final predicateFilterCount = _filterOnPredicates(query, packages); if (predicateFilterCount <= query.offset) { return PackageSearchResult.empty(); } - - // TODO: find a better way to handle predicate-only filtering and scoring - for (final index in packages.asIntIterable()) { - if (index >= _documents.length) break; - packageScores.setValue(index, 1.0); - } + final bestNameMatch = _bestNameMatch(query); + final bestNameIndex = + bestNameMatch == null ? null : _nameToIndex[bestNameMatch]; // do text matching final parsedQueryText = query.parsedQuery.text; - final textResults = _searchText( - packageScores, - packages, - parsedQueryText, - textMatchExtent: query.textMatchExtent ?? TextMatchExtent.api, - ); + _TextResults? textResults; + IndexedScore? packageScores; + + if (parsedQueryText != null && parsedQueryText.isNotEmpty) { + packageScores = scoreFn(); + textResults = _searchText( + packageScores, + packages, + parsedQueryText, + textMatchExtent: query.textMatchExtent ?? TextMatchExtent.api, + ); + if (textResults.hasNoMatch) { + return textResults.errorMessage == null + ? PackageSearchResult.empty() + : PackageSearchResult.error( + errorMessage: textResults.errorMessage, + statusCode: 500, + ); + } + } - final bestNameMatch = _bestNameMatch(query); + // The function takes the document index as parameter and returns whether + // it should be in the result set. When text search is applied, the + // [packageScores] contains the scores of the results, otherwise we are + // using the bitarray index of the filtering. + final selectFn = packageScores?.isPositive ?? packages.isSet; + + // We know the total count at this point, we don't need to build the fully + // sorted result list to get the number. The best name match may insert an + // extra item, that will be addressed after the ranking score is determined. + var totalCount = packageScores?.positiveCount() ?? predicateFilterCount; - List indexedHits; - switch (query.effectiveOrder ?? SearchOrder.top) { + Iterable indexedHits; + switch (query.effectiveOrder) { case SearchOrder.top: - if (textResults == null) { - indexedHits = _overallOrderedHits.whereInScores(packageScores); + case SearchOrder.text: + if (packageScores == null) { + indexedHits = _overallOrderedHits.whereInScores(selectFn); break; } - /// Adjusted score takes the overall score and transforms - /// it linearly into the [0.4-1.0] range, to allow better - /// multiplication outcomes. - packageScores.multiplyAllFromValues(_adjustedOverallScores); - indexedHits = _rankWithValues( - packageScores, - requiredLengthThreshold: query.offset, - bestNameMatch: bestNameMatch, - ); - break; - case SearchOrder.text: + if (query.effectiveOrder == SearchOrder.top) { + /// Adjusted score takes the overall score and transforms + /// it linearly into the [0.4-1.0] range, to allow better + /// multiplication outcomes. + packageScores.multiplyAllFromValues(_adjustedOverallScores); + } + // Check whether the best name match will increase the total item count. + if (bestNameIndex != null && + packageScores.getValue(bestNameIndex) <= 0.0) { + totalCount++; + } indexedHits = _rankWithValues( packageScores, requiredLengthThreshold: query.offset, - bestNameMatch: bestNameMatch, + bestNameIndex: bestNameIndex ?? -1, ); break; case SearchOrder.created: - indexedHits = _createdOrderedHits.whereInScores(packageScores); + indexedHits = _createdOrderedHits.whereInScores(selectFn); break; case SearchOrder.updated: - indexedHits = _updatedOrderedHits.whereInScores(packageScores); + indexedHits = _updatedOrderedHits.whereInScores(selectFn); break; // ignore: deprecated_member_use case SearchOrder.popularity: case SearchOrder.downloads: - indexedHits = _downloadsOrderedHits.whereInScores(packageScores); + indexedHits = _downloadsOrderedHits.whereInScores(selectFn); break; case SearchOrder.like: - indexedHits = _likesOrderedHits.whereInScores(packageScores); + indexedHits = _likesOrderedHits.whereInScores(selectFn); break; case SearchOrder.points: - indexedHits = _pointsOrderedHits.whereInScores(packageScores); + indexedHits = _pointsOrderedHits.whereInScores(selectFn); break; case SearchOrder.trending: - indexedHits = _trendingOrderedHits.whereInScores(packageScores); + indexedHits = _trendingOrderedHits.whereInScores(selectFn); break; } - // bound by offset and limit (or randomize items) - final totalCount = indexedHits.length; - indexedHits = - boundedList(indexedHits, offset: query.offset, limit: query.limit); + // bound by offset and limit + indexedHits = indexedHits.skip(query.offset).take(query.limit); late List packageHits; if ((query.textMatchExtent ?? TextMatchExtent.api).shouldMatchApi() && textResults != null && (textResults.topApiPages?.isNotEmpty ?? false)) { packageHits = indexedHits.map((ps) { - final apiPages = textResults.topApiPages?[ps.index] + final apiPages = textResults!.topApiPages?[ps.index] // TODO(https://github.com/dart-lang/pub-dev/issues/7106): extract title for the page ?.map((MapEntry e) => ApiPageRef(path: e.key)) .toList(); @@ -380,33 +398,30 @@ class InMemoryPackageIndex { }).toList(); } - _TextResults? _searchText( + _TextResults _searchText( IndexedScore packageScores, BitArray packages, - String? text, { + String text, { required TextMatchExtent textMatchExtent, }) { - if (text == null || text.isEmpty) { - return null; - } - final sw = Stopwatch()..start(); final words = splitForQuery(text); if (words.isEmpty) { - // packages.clearAll(); - packageScores.fillRange(0, packageScores.length, 0); return _TextResults.empty(); } final matchName = textMatchExtent.shouldMatchName(); if (!matchName) { - // packages.clearAll(); - packageScores.fillRange(0, packageScores.length, 0); return _TextResults.empty( errorMessage: 'Search index in reduced mode: unable to match query text.'); } + for (final index in packages.asIntIterable()) { + if (index >= _documents.length) break; + packageScores.setValue(index, 1.0); + } + bool aborted = false; bool checkAborted() { if (!aborted && sw.elapsed > _textSearchTimeout) { @@ -500,19 +515,18 @@ class InMemoryPackageIndex { List _rankWithValues( IndexedScore score, { // if the item count is fewer than this threshold, an empty list will be returned - int? requiredLengthThreshold, - String? bestNameMatch, + required int requiredLengthThreshold, + // When no best name match is applied, this parameter will be `-1` + required int bestNameIndex, }) { final list = []; - final bestNameIndex = - bestNameMatch == null ? null : _nameToIndex[bestNameMatch]; for (var i = 0; i < score.length; i++) { final value = score.getValue(i); if (value <= 0.0 && i != bestNameIndex) continue; list.add(IndexedPackageHit( i, PackageHit(package: score.keys[i], score: value))); } - if ((requiredLengthThreshold ?? 0) > list.length) { + if (requiredLengthThreshold > list.length) { // There is no point to sort or even keep the results, as the search query offset ignores these anyway. return []; } @@ -582,6 +596,7 @@ class InMemoryPackageIndex { } class _TextResults { + final bool hasNoMatch; final List>?>? topApiPages; final String? errorMessage; @@ -589,12 +604,14 @@ class _TextResults { return _TextResults( null, errorMessage: errorMessage, + hasNoMatch: true, ); } _TextResults( this.topApiPages, { this.errorMessage, + this.hasNoMatch = false, }); } @@ -713,8 +730,8 @@ class _PkgNameData { } extension on List { - List whereInScores(IndexedScore scores) { - return where((h) => scores.isPositive(h.index)).toList(); + Iterable whereInScores(bool Function(int index) select) { + return where((h) => select(h.index)); } } diff --git a/app/lib/search/search_service.dart b/app/lib/search/search_service.dart index 0ca439c557..49cf97db3f 100644 --- a/app/lib/search/search_service.dart +++ b/app/lib/search/search_service.dart @@ -282,13 +282,11 @@ class ServiceSearchQuery { /// - URL query sort [order] is used as a fallback. /// /// TODO: remove this field when [order] is removed. - late final effectiveOrder = parsedQuery.order ?? order; + late final effectiveOrder = parsedQuery.order ?? order ?? SearchOrder.top; bool get _hasQuery => query != null && query!.isNotEmpty; bool get _hasOnlyFreeText => _hasQuery && parsedQuery.hasOnlyFreeText; bool get isNaturalOrder => - effectiveOrder == null || - effectiveOrder == SearchOrder.top || - effectiveOrder == SearchOrder.text; + effectiveOrder == SearchOrder.top || effectiveOrder == SearchOrder.text; bool get _hasNoOwnershipScope => publisherId == null; bool get _isFlutterFavorite => tagsPredicate.hasTag(PackageTags.isFlutterFavorite); diff --git a/app/lib/search/token_index.dart b/app/lib/search/token_index.dart index 9dcdb3d045..038955af09 100644 --- a/app/lib/search/token_index.dart +++ b/app/lib/search/token_index.dart @@ -177,6 +177,8 @@ abstract class _AllocationPool { _pool.add(item); } + /// Executes [fn] and provides a pool item in the callback. + /// The item will be released to the pool after [fn] completes. R withPoolItem({ required R Function(T array) fn, }) { @@ -185,6 +187,28 @@ abstract class _AllocationPool { _release(item); return r; } + + /// Executes [fn] and provides a getter function that can be used to + /// acquire new pool items while the [fn] is being executed. The + /// acquired items will be released back to the pool after [fn] completes. + R withItemGetter(R Function(T Function() itemFn) fn) { + List? items; + T itemFn() { + items ??= []; + final item = _acquire(); + items!.add(item); + return item; + } + + final r = fn(itemFn); + + if (items != null) { + for (final item in items!) { + _release(item); + } + } + return r; + } } /// A reusable pool for [IndexedScore] instances to spare some memory allocation. @@ -225,6 +249,14 @@ class IndexedScore { List get keys => _keys; late final length = _values.length; + int positiveCount() { + var count = 0; + for (var i = 0; i < length; i++) { + if (isPositive(i)) count++; + } + return count; + } + bool isPositive(int index) { return _values[index] > 0.0; } diff --git a/app/lib/shared/utils.dart b/app/lib/shared/utils.dart index 731bd9073a..ee659ff224 100644 --- a/app/lib/shared/utils.dart +++ b/app/lib/shared/utils.dart @@ -154,22 +154,6 @@ String contentType(String name) { return mime.defaultExtensionMap[ext] ?? 'application/octet-stream'; } -/// Returns a subset of the list, bounded by [offset] and [limit]. -List boundedList(List list, {int offset = 0, int limit = 0}) { - Iterable iterable = list; - if (offset > 0) { - if (offset >= list.length) { - return []; - } else { - iterable = iterable.skip(offset); - } - } - if (limit > 0) { - iterable = iterable.take(limit); - } - return iterable.toList(); -} - /// Returns a UUID in v4 format as a `String`. /// /// If [bytes] is provided, it must be length 16 and have values between `0` and diff --git a/app/lib/third_party/bit_array/bit_array.dart b/app/lib/third_party/bit_array/bit_array.dart index 3934f8081b..2a27e83d44 100644 --- a/app/lib/third_party/bit_array/bit_array.dart +++ b/app/lib/third_party/bit_array/bit_array.dart @@ -63,6 +63,8 @@ class BitArray extends BitSet { return BitArray._(data); } + bool isSet(int index) => this[index]; + /// The value of the bit with the specified [index]. @override bool operator [](int index) { diff --git a/app/test/shared/utils_test.dart b/app/test/shared/utils_test.dart index 3f6d33c362..c882064f94 100644 --- a/app/test/shared/utils_test.dart +++ b/app/test/shared/utils_test.dart @@ -8,30 +8,6 @@ import 'package:pub_semver/pub_semver.dart'; import 'package:test/test.dart'; void main() { - group('boundedList', () { - final numbers10 = List.generate(10, (i) => i); - - test('empty bounds', () { - expect(boundedList(numbers10), numbers10); - }); - - test('offset only', () { - expect(boundedList(numbers10, offset: 6), [6, 7, 8, 9]); - expect(boundedList(numbers10, offset: 16), []); - }); - - test('limit only', () { - expect(boundedList(numbers10, limit: 0), numbers10); - expect(boundedList(numbers10, limit: 3), [0, 1, 2]); - expect(boundedList(numbers10, limit: 13), numbers10); - }); - - test('offset and limit', () { - expect(boundedList(numbers10, offset: 1, limit: 3), [1, 2, 3]); - expect(boundedList(numbers10, offset: 9, limit: 10), [9]); - }); - }); - group('uuid', () { test('format known UUId', () { expect(createUuid(List.filled(16, 0)),