diff --git a/app/lib/search/mem_index.dart b/app/lib/search/mem_index.dart index 55f02e28b9..3297266849 100644 --- a/app/lib/search/mem_index.dart +++ b/app/lib/search/mem_index.dart @@ -226,6 +226,7 @@ class InMemoryPackageIndex { packageScores, parsedQueryText, includeNameMatches: (query.offset ?? 0) == 0, + textMatchExtent: query.textMatchExtent ?? TextMatchExtent.api, ); final nameMatches = textResults?.nameMatches; @@ -287,7 +288,9 @@ class InMemoryPackageIndex { boundedList(indexedHits, offset: query.offset, limit: query.limit); late List packageHits; - if (textResults != null && (textResults.topApiPages?.isNotEmpty ?? false)) { + if ((query.textMatchExtent ?? TextMatchExtent.api).shouldMatchApi() && + textResults != null && + (textResults.topApiPages?.isNotEmpty ?? false)) { packageHits = indexedHits.map((ps) { final apiPages = textResults.topApiPages?[ps.index] // TODO(https://github.com/dart-lang/pub-dev/issues/7106): extract title for the page @@ -305,6 +308,7 @@ class InMemoryPackageIndex { nameMatches: nameMatches, topicMatches: topicMatches, packageHits: packageHits, + errorMessage: textResults?.errorMessage, ); } @@ -332,61 +336,81 @@ class InMemoryPackageIndex { IndexedScore packageScores, String? text, { required bool includeNameMatches, + required TextMatchExtent textMatchExtent, }) { + if (text == null || text.isEmpty) { + return null; + } + final sw = Stopwatch()..start(); - if (text != null && text.isNotEmpty) { - final words = splitForQuery(text); - if (words.isEmpty) { - for (var i = 0; i < packageScores.length; i++) { - packageScores.setValue(i, 0); - } - return _TextResults.empty(); - } + final words = splitForQuery(text); + if (words.isEmpty) { + packageScores.fillRange(0, packageScores.length, 0); + return _TextResults.empty(); + } - bool aborted = false; + final matchName = textMatchExtent.shouldMatchName(); + if (!matchName) { + packageScores.fillRange(0, packageScores.length, 0); + return _TextResults.empty( + errorMessage: + 'Search index in reduced mode: unable to match query text.'); + } - bool checkAborted() { - if (!aborted && sw.elapsed > _textSearchTimeout) { - aborted = true; - _logger.info( - '[pub-aborted-search-query] Aborted text search after ${sw.elapsedMilliseconds} ms.'); - } - return aborted; + bool aborted = false; + bool checkAborted() { + if (!aborted && sw.elapsed > _textSearchTimeout) { + aborted = true; + _logger.info( + '[pub-aborted-search-query] Aborted text search after ${sw.elapsedMilliseconds} ms.'); } + return aborted; + } + + Set? nameMatches; + if (includeNameMatches && _documentsByName.containsKey(text)) { + nameMatches ??= {}; + nameMatches.add(text); + } + + // Multiple words are scored separately, and then the individual scores + // are multiplied. We can use a package filter that is applied after each + // word to reduce the scope of the later words based on the previous results. + /// However, API docs search should be filtered on the original list. + final indexedPositiveList = packageScores.toIndexedPositiveList(); - Set? nameMatches; - if (includeNameMatches && _documentsByName.containsKey(text)) { + final matchDescription = textMatchExtent.shouldMatchDescription(); + final matchReadme = textMatchExtent.shouldMatchReadme(); + final matchApi = textMatchExtent.shouldMatchApi(); + + for (final word in words) { + if (includeNameMatches && _documentsByName.containsKey(word)) { nameMatches ??= {}; - nameMatches.add(text); + nameMatches.add(word); } - // Multiple words are scored separately, and then the individual scores - // are multiplied. We can use a package filter that is applied after each - // word to reduce the scope of the later words based on the previous results. - /// However, API docs search should be filtered on the original list. - final indexedPositiveList = packageScores.toIndexedPositiveList(); - - for (final word in words) { - if (includeNameMatches && _documentsByName.containsKey(word)) { - nameMatches ??= {}; - nameMatches.add(word); - } + _scorePool.withScore( + value: 0.0, + fn: (wordScore) { + _packageNameIndex.searchWord(word, + score: wordScore, filterOnNonZeros: packageScores); - _scorePool.withScore( - value: 0.0, - fn: (wordScore) { - _packageNameIndex.searchWord(word, - score: wordScore, filterOnNonZeros: packageScores); + if (matchDescription) { _descrIndex.searchAndAccumulate(word, score: wordScore); + } + if (matchReadme) { _readmeIndex.searchAndAccumulate(word, weight: 0.75, score: wordScore); - packageScores.multiplyAllFrom(wordScore); - }, - ); - } + } + packageScores.multiplyAllFrom(wordScore); + }, + ); + } - final topApiPages = - List>?>.filled(_documents.length, null); + final topApiPages = + List>?>.filled(_documents.length, null); + + if (matchApi) { const maxApiPageCount = 2; if (!checkAborted()) { _apiSymbolIndex.withSearchWords(words, weight: 0.70, (symbolPages) { @@ -420,29 +444,28 @@ class InMemoryPackageIndex { } }); } + } - // filter results based on exact phrases - final phrases = extractExactPhrases(text); - if (!aborted && phrases.isNotEmpty) { - for (var i = 0; i < packageScores.length; i++) { - if (packageScores.isNotPositive(i)) continue; - final doc = _documents[i]; - final matchedAllPhrases = phrases.every((phrase) => - doc.package.contains(phrase) || - doc.description!.contains(phrase) || - doc.readme!.contains(phrase)); - if (!matchedAllPhrases) { - packageScores.setValue(i, 0); - } + // filter results based on exact phrases + final phrases = extractExactPhrases(text); + if (!aborted && phrases.isNotEmpty) { + for (var i = 0; i < packageScores.length; i++) { + if (packageScores.isNotPositive(i)) continue; + final doc = _documents[i]; + final matchedAllPhrases = phrases.every((phrase) => + (matchName && doc.package.contains(phrase)) || + (matchDescription && doc.description!.contains(phrase)) || + (matchReadme && doc.readme!.contains(phrase))); + if (!matchedAllPhrases) { + packageScores.setValue(i, 0); } } - - return _TextResults( - topApiPages, - nameMatches: nameMatches?.toList(), - ); } - return null; + + return _TextResults( + topApiPages, + nameMatches: nameMatches?.toList(), + ); } List _rankWithValues( @@ -521,15 +544,20 @@ class InMemoryPackageIndex { class _TextResults { final List>?>? topApiPages; final List? nameMatches; + final String? errorMessage; - factory _TextResults.empty() => _TextResults( - null, - nameMatches: null, - ); + factory _TextResults.empty({String? errorMessage}) { + return _TextResults( + null, + nameMatches: null, + errorMessage: errorMessage, + ); + } _TextResults( this.topApiPages, { required this.nameMatches, + this.errorMessage, }); } diff --git a/app/lib/search/search_service.dart b/app/lib/search/search_service.dart index 11c67d1e31..43fa2e05fd 100644 --- a/app/lib/search/search_service.dart +++ b/app/lib/search/search_service.dart @@ -8,6 +8,7 @@ import 'dart:math' show max; import 'package:_pub_shared/search/search_form.dart'; import 'package:_pub_shared/search/tags.dart'; import 'package:clock/clock.dart'; +import 'package:collection/collection.dart'; import 'package:json_annotation/json_annotation.dart'; import 'package:pub_dev/shared/utils.dart'; @@ -165,6 +166,9 @@ class ServiceSearchQuery { final int? offset; final int? limit; + /// The scope/depth of text matching. + final TextMatchExtent? textMatchExtent; + ServiceSearchQuery._({ this.query, TagsPredicate? tagsPredicate, @@ -173,6 +177,7 @@ class ServiceSearchQuery { this.order, this.offset, this.limit, + this.textMatchExtent, }) : parsedQuery = ParsedQueryText.parse(query), tagsPredicate = tagsPredicate ?? TagsPredicate(), publisherId = publisherId?.trimToNull(); @@ -185,6 +190,7 @@ class ServiceSearchQuery { int? minPoints, int offset = 0, int? limit = 10, + TextMatchExtent? textMatchExtent, }) { final q = query?.trimToNull(); return ServiceSearchQuery._( @@ -195,6 +201,7 @@ class ServiceSearchQuery { order: order, offset: offset, limit: limit, + textMatchExtent: textMatchExtent, ); } @@ -210,6 +217,10 @@ class ServiceSearchQuery { int.tryParse(uri.queryParameters['minPoints'] ?? '0') ?? 0; final offset = int.tryParse(uri.queryParameters['offset'] ?? '0') ?? 0; final limit = int.tryParse(uri.queryParameters['limit'] ?? '0') ?? 0; + final textMatchExtentValue = + uri.queryParameters['textMatchExtent']?.trim() ?? ''; + final textMatchExtent = TextMatchExtent.values + .firstWhereOrNull((e) => e.name == textMatchExtentValue); return ServiceSearchQuery.parse( query: q, @@ -219,6 +230,7 @@ class ServiceSearchQuery { minPoints: minPoints, offset: max(0, offset), limit: max(_minSearchLimit, limit), + textMatchExtent: textMatchExtent, ); } @@ -229,6 +241,7 @@ class ServiceSearchQuery { SearchOrder? order, int? offset, int? limit, + TextMatchExtent? textMatchExtent, }) { return ServiceSearchQuery._( query: query ?? this.query, @@ -238,6 +251,7 @@ class ServiceSearchQuery { minPoints: minPoints, offset: offset ?? this.offset, limit: limit ?? this.limit, + textMatchExtent: textMatchExtent ?? this.textMatchExtent, ); } @@ -251,6 +265,7 @@ class ServiceSearchQuery { 'minPoints': minPoints.toString(), 'limit': limit?.toString(), 'order': order?.name, + if (textMatchExtent != null) 'textMatchExtent': textMatchExtent!.name, }; map.removeWhere((k, v) => v == null); return map; @@ -277,7 +292,8 @@ class ServiceSearchQuery { _hasOnlyFreeText && _isNaturalOrder && _hasNoOwnershipScope && - !_isFlutterFavorite; + !_isFlutterFavorite && + (textMatchExtent ?? TextMatchExtent.api).shouldMatchApi(); bool get considerHighlightedHit => _hasOnlyFreeText && _hasNoOwnershipScope; bool get includeHighlightedHit => considerHighlightedHit && offset == 0; @@ -295,6 +311,38 @@ class ServiceSearchQuery { } } +/// The scope (depth) of the text matching. +enum TextMatchExtent { + /// No text search is done. + /// Requests with text queries will return a failure message. + none, + + /// Text search is on package names. + name, + + /// Text search is on package names, descriptions and topic tags. + description, + + /// Text search is on names, descriptions, topic tags and readme content. + readme, + + /// Text search is on names, descriptions, topic tags, readme content and API symbols. + api, + ; + + /// Text search is on package names. + bool shouldMatchName() => index >= name.index; + + /// Text search is on package names, descriptions and topic tags. + bool shouldMatchDescription() => index >= description.index; + + /// Text search is on names, descriptions, topic tags and readme content. + bool shouldMatchReadme() => index >= readme.index; + + /// Text search is on names, descriptions, topic tags, readme content and API symbols. + bool shouldMatchApi() => index >= api.index; +} + class QueryValidity { final String? rejectReason; diff --git a/app/lib/service/entrypoint/search.dart b/app/lib/service/entrypoint/search.dart index db26223e5a..7b650f1b12 100644 --- a/app/lib/service/entrypoint/search.dart +++ b/app/lib/service/entrypoint/search.dart @@ -43,7 +43,7 @@ class SearchCommand extends Command { ); registerScopeExitCallback(index.close); - registerSearchIndex(IsolateSearchIndex(index)); + registerSearchIndex(LatencyAwareSearchIndex(IsolateSearchIndex(index))); void scheduleRenew() { scheduleMicrotask(() async { diff --git a/app/lib/service/entrypoint/search_index.dart b/app/lib/service/entrypoint/search_index.dart index a408195de8..7fd5abfe8f 100644 --- a/app/lib/service/entrypoint/search_index.dart +++ b/app/lib/service/entrypoint/search_index.dart @@ -19,6 +19,7 @@ import 'package:pub_dev/service/services.dart'; import 'package:pub_dev/shared/env_config.dart'; import 'package:pub_dev/shared/logging.dart'; import 'package:pub_dev/shared/monitoring.dart'; +import 'package:pub_dev/shared/utils.dart'; final _logger = Logger('search_index'); @@ -137,3 +138,70 @@ class IsolateSearchIndex implements SearchIndex { ); } } + +/// A search index that adjusts the extent of the text matching based on the +/// observed recent latency (adjusted with a 1-minute half-life decay). +class LatencyAwareSearchIndex implements SearchIndex { + final SearchIndex _delegate; + final _latencyTracker = DecayingMaxLatencyTracker(); + + LatencyAwareSearchIndex(this._delegate); + + @override + FutureOr indexInfo() => _delegate.indexInfo(); + + @override + FutureOr isReady() => _delegate.isReady(); + + @override + Future search(ServiceSearchQuery query) async { + final sw = Stopwatch()..start(); + try { + return await _delegate.search(query.change( + textMatchExtent: _selectTextMatchExtent(), + )); + } finally { + sw.stop(); + final elapsed = sw.elapsed; + // Note: The maximum latency value here limits how long an outlier + // processing will affect later queries. With the current 1-minute + // decay half-life, it will allow: + // - name-only search after about 2.5 minutes, + // - descriptions after 4 minutes, + // - readmes after 6 minutes, + // - everything after 7 minutes. + _latencyTracker.observe( + elapsed.inMinutes >= 1 ? const Duration(minutes: 1) : elapsed); + } + } + + /// Selects the text match extent value based on the recent maximum latency. + /// + /// Note: the latency here may be a residue of a large spike that happened + /// more than a few minute ago, therefore we are deciding on latency + /// range over the default 5 seconds timeout window. + TextMatchExtent _selectTextMatchExtent() { + final latency = _latencyTracker.getLatency(); + if (latency < const Duration(seconds: 1)) { + _logger.info('[text-match-normal]'); + return TextMatchExtent.api; + } + if (latency < const Duration(seconds: 2)) { + _logger.info('[text-match-readme]'); + return TextMatchExtent.readme; + } + if (latency < const Duration(seconds: 4)) { + _logger.info('[text-match-description]'); + // TODO: use `TextMatchExtent.description` after we are confident about this change. + return TextMatchExtent.readme; + } + if (latency < const Duration(seconds: 10)) { + _logger.info('[text-match-name]'); + // TODO: use `TextMatchExtent.name` after we are confident about this change. + return TextMatchExtent.readme; + } + // TODO: use `TextMatchExtent.none` after we are confident about this change. + _logger.info('[text-match-none]'); + return TextMatchExtent.readme; + } +} diff --git a/app/lib/shared/utils.dart b/app/lib/shared/utils.dart index 6990c243ff..aea763795b 100644 --- a/app/lib/shared/utils.dart +++ b/app/lib/shared/utils.dart @@ -10,6 +10,7 @@ import 'dart:math'; import 'dart:typed_data'; import 'package:appengine/appengine.dart'; +import 'package:clock/clock.dart'; import 'package:intl/intl.dart'; // ignore: implementation_imports import 'package:mime/src/default_extension_map.dart' as mime; @@ -305,3 +306,55 @@ extension ByteFolderExt on Stream> { return buffer.toBytes(); } } + +/// Tracks the maximum latency by observing each latency value and keeping the maximum. +/// The tracked maximum value decays, halving its value in every minute. +class DecayingMaxLatencyTracker { + final Duration _halfLifePeriod; + + int _value = 0; + DateTime _lastUpdated = clock.now(); + + DecayingMaxLatencyTracker({ + Duration? halfLifePeriod, + }) : _halfLifePeriod = halfLifePeriod ?? Duration(minutes: 1); + + void _decay({ + required DateTime now, + Duration? updateDelay, + }) { + updateDelay ??= Duration.zero; + final diff = now.difference(_lastUpdated); + if (diff <= updateDelay) { + return; + } + final multiplier = + pow(0.5, diff.inMicroseconds / _halfLifePeriod.inMicroseconds); + _value = (_value * multiplier).round(); + _lastUpdated = now; + } + + Duration getLatency({ + DateTime? now, + Duration? updateDelay, + }) { + _decay( + now: now ?? clock.now(), + updateDelay: updateDelay ?? const Duration(seconds: 1), + ); + return Duration(microseconds: _value); + } + + void observe( + Duration duration, { + DateTime? now, + }) { + now ??= clock.now(); + _decay(now: now); + final value = duration.inMicroseconds; + if (_value < value) { + _value = value; + _lastUpdated = now; + } + } +} diff --git a/app/test/shared/utils_test.dart b/app/test/shared/utils_test.dart index 6062ecafdc..3f6d33c362 100644 --- a/app/test/shared/utils_test.dart +++ b/app/test/shared/utils_test.dart @@ -2,6 +2,7 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'package:clock/clock.dart'; import 'package:pub_dev/shared/utils.dart'; import 'package:pub_semver/pub_semver.dart'; import 'package:test/test.dart'; @@ -78,4 +79,22 @@ void main() { expect(compare('2.0.0-dev', '1.9.0'), 1); }); }); + + group('DecayingMaxLatencyTracker', () { + late final DateTime now; + + DateTime _nowPlus(int seconds) => now.add(Duration(seconds: seconds)); + + test('decays', () { + final tracker = + DecayingMaxLatencyTracker(halfLifePeriod: Duration(seconds: 10)); + now = clock.now(); + tracker.observe(Duration(seconds: 40), now: now); + expect(tracker.getLatency(now: now).inMilliseconds, 40000); + expect(tracker.getLatency(now: _nowPlus(10)).inMilliseconds, 20000); + expect(tracker.getLatency(now: _nowPlus(20)).inMilliseconds, 10000); + tracker.observe(Duration(seconds: 20), now: _nowPlus(25)); + expect(tracker.getLatency().inMilliseconds, greaterThan(15000)); + }); + }); }