diff --git a/docs/changelog/136646.yaml b/docs/changelog/136646.yaml new file mode 100644 index 0000000000000..9e57724c356cd --- /dev/null +++ b/docs/changelog/136646.yaml @@ -0,0 +1,5 @@ +pr: 136646 +summary: Can match search shard phase APM metric +area: Search +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchPhaseAPMMetrics.java b/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchPhaseAPMMetrics.java index f35be08dc6472..2503b76b1def8 100644 --- a/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchPhaseAPMMetrics.java +++ b/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchPhaseAPMMetrics.java @@ -22,15 +22,23 @@ public final class ShardSearchPhaseAPMMetrics implements SearchOperationListener { + public static final String CAN_MATCH_SEARCH_PHASE_METRIC = "es.search.shards.phases.can_match.duration.histogram"; public static final String DFS_SEARCH_PHASE_METRIC = "es.search.shards.phases.dfs.duration.histogram"; public static final String QUERY_SEARCH_PHASE_METRIC = "es.search.shards.phases.query.duration.histogram"; public static final String FETCH_SEARCH_PHASE_METRIC = "es.search.shards.phases.fetch.duration.histogram"; + private final LongHistogram canMatchPhaseMetric; private final LongHistogram dfsPhaseMetric; private final LongHistogram queryPhaseMetric; private final LongHistogram fetchPhaseMetric; public ShardSearchPhaseAPMMetrics(MeterRegistry meterRegistry) { + this.canMatchPhaseMetric = meterRegistry.registerLongHistogram( + CAN_MATCH_SEARCH_PHASE_METRIC, + "Can match phase execution times at the shard level, expressed as a histogram", + "ms" + ); + this.dfsPhaseMetric = meterRegistry.registerLongHistogram( DFS_SEARCH_PHASE_METRIC, "DFS search phase execution times at the shard level, expressed as a histogram", @@ -48,6 +56,11 @@ public ShardSearchPhaseAPMMetrics(MeterRegistry meterRegistry) { ); } + @Override + public void onCanMatchPhase(long tookInNanos) { + recordPhaseLatency(canMatchPhaseMetric, tookInNanos); + } + @Override public void onDfsPhase(SearchContext searchContext, long tookInNanos) { recordPhaseLatency(dfsPhaseMetric, tookInNanos); diff --git a/server/src/main/java/org/elasticsearch/index/shard/SearchOperationListener.java b/server/src/main/java/org/elasticsearch/index/shard/SearchOperationListener.java index 9dc84090a610f..08e4c10169024 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/SearchOperationListener.java +++ b/server/src/main/java/org/elasticsearch/index/shard/SearchOperationListener.java @@ -87,6 +87,14 @@ default void onDfsPhase(SearchContext searchContext, long tookInNanos) {} */ default void onFailedDfsPhase(SearchContext searchContext) {} + /** + * Executed after the can-match phase successfully finished. + * Note: this is not invoked if the can match phase execution failed. + * + * @param tookInNanos the number of nanoseconds the can-match execution took + */ + default void onCanMatchPhase(long tookInNanos) {} + /** * Executed when a new reader context was created * @param readerContext the created context @@ -237,6 +245,17 @@ public void onDfsPhase(SearchContext searchContext, long tookInNanos) { } } + @Override + public void onCanMatchPhase(long tookInNanos) { + for (SearchOperationListener listener : listeners) { + try { + listener.onCanMatchPhase(tookInNanos); + } catch (Exception e) { + logger.warn(() -> "onCanMatchPhase listener [" + listener + "] failed", e); + } + } + } + @Override public void onNewReaderContext(ReaderContext readerContext) { for (SearchOperationListener listener : listeners) { diff --git a/server/src/main/java/org/elasticsearch/search/SearchService.java b/server/src/main/java/org/elasticsearch/search/SearchService.java index 2c53df81041b2..eebf1703e4b62 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchService.java +++ b/server/src/main/java/org/elasticsearch/search/SearchService.java @@ -1937,9 +1937,14 @@ public void canMatch(CanMatchNodeRequest request, ActionListener responses = new ArrayList<>(shardLevelRequests.size()); for (var shardLevelRequest : shardLevelRequests) { + long shardCanMatchStartTimeInNanos = System.nanoTime(); + ShardSearchRequest shardSearchRequest = request.createShardSearchRequest(shardLevelRequest); + final IndexService indexService = indicesService.indexServiceSafe(shardSearchRequest.shardId().getIndex()); + final IndexShard indexShard = indexService.getShard(shardSearchRequest.shardId().id()); try { // TODO remove the exception handling as it's now in canMatch itself - responses.add(new CanMatchNodeResponse.ResponseOrFailure(canMatch(request.createShardSearchRequest(shardLevelRequest)))); + responses.add(new CanMatchNodeResponse.ResponseOrFailure(canMatch(shardSearchRequest))); + indexShard.getSearchOperationListener().onCanMatchPhase(System.nanoTime() - shardCanMatchStartTimeInNanos); } catch (Exception e) { responses.add(new CanMatchNodeResponse.ResponseOrFailure(e)); } diff --git a/server/src/test/java/org/elasticsearch/search/TelemetryMetrics/ShardSearchPhaseAPMMetricsTests.java b/server/src/test/java/org/elasticsearch/search/TelemetryMetrics/ShardSearchPhaseAPMMetricsTests.java index 95091a0a1e98a..7ddd64d151b5c 100644 --- a/server/src/test/java/org/elasticsearch/search/TelemetryMetrics/ShardSearchPhaseAPMMetricsTests.java +++ b/server/src/test/java/org/elasticsearch/search/TelemetryMetrics/ShardSearchPhaseAPMMetricsTests.java @@ -39,6 +39,7 @@ import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery; +import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.CAN_MATCH_SEARCH_PHASE_METRIC; import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.DFS_SEARCH_PHASE_METRIC; import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.FETCH_SEARCH_PHASE_METRIC; import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.QUERY_SEARCH_PHASE_METRIC; @@ -255,6 +256,25 @@ public void testSearchTransportMetricsScrollSystem() { ); } + public void testCanMatchSearch() { + assertSearchHitsWithoutFailures( + client().prepareSearch(indexName) + .setSearchType(SearchType.QUERY_THEN_FETCH) + .setPreFilterShardSize(1) + .setQuery(simpleQueryStringQuery("doc1")), + "1" + ); + + final List canMatchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(CAN_MATCH_SEARCH_PHASE_METRIC); + assertEquals(num_primaries, canMatchMeasurements.size()); + final List queryMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(QUERY_SEARCH_PHASE_METRIC); + assertEquals(num_primaries, queryMeasurements.size()); + assertAttributes(queryMeasurements, false, false); + final List fetchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(FETCH_SEARCH_PHASE_METRIC); + assertEquals(1, fetchMeasurements.size()); + assertAttributes(fetchMeasurements, false, false); + } + private static void assertAttributes(List measurements, boolean isSystem, boolean isScroll) { for (Measurement measurement : measurements) { Map attributes = measurement.attributes(); @@ -346,6 +366,8 @@ public void testTimeRangeFilterAllResults() { assertSearchHits(searchResponse, "1", "2"); assertThat(searchResponse.getSkippedShards(), Matchers.greaterThanOrEqualTo(num_primaries - 2)); }); + final List canMatchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(CAN_MATCH_SEARCH_PHASE_METRIC); + assertEquals(num_primaries, canMatchMeasurements.size()); final List queryMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(QUERY_SEARCH_PHASE_METRIC); // the two docs are at most spread across two shards, other shards are empty and get filtered out assertThat(queryMeasurements.size(), Matchers.lessThanOrEqualTo(2));