diff --git a/docs/changelog/136481.yaml b/docs/changelog/136481.yaml new file mode 100644 index 0000000000000..629e31bdb6346 --- /dev/null +++ b/docs/changelog/136481.yaml @@ -0,0 +1,5 @@ +pr: 136481 +summary: Dfs query phase coordinator metric +area: Search +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java index 845d8680e67c2..3291e849566d0 100644 --- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java @@ -691,6 +691,13 @@ public SearchTransportService getSearchTransport() { return searchTransportService; } + /** + * Returns the {@link SearchResponseMetrics} to record search phase timings + */ + public SearchResponseMetrics getSearchResponseMetrics() { + return searchResponseMetrics; + } + public final void execute(Runnable command) { executor.execute(command); } diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 71eb94459548c..a12c11b525bff 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -42,7 +42,7 @@ /** * This search phase fans out to every shards to execute a distributed search with a pre-collected distributed frequencies for all - * search terms used in the actual search query. This phase is very similar to a the default query-then-fetch search phase but it doesn't + * search terms used in the actual search query. This phase is very similar to the default query-then-fetch search phase, but it doesn't * retry on another shard if any of the shards are failing. Failures are treated as shard failures and are counted as a non-successful * operation. * @see CountedCollector#onFailure(int, SearchShardTarget, Exception) @@ -55,6 +55,7 @@ class DfsQueryPhase extends SearchPhase { private final Client client; private final AbstractSearchAsyncAction context; private final SearchProgressListener progressListener; + private long phaseStartTimeInNanos; DfsQueryPhase(SearchPhaseResults queryResult, Client client, AbstractSearchAsyncAction context) { super(NAME); @@ -72,6 +73,7 @@ protected SearchPhase nextPhase(AggregatedDfs dfs) { @SuppressWarnings("unchecked") @Override protected void run() { + phaseStartTimeInNanos = System.nanoTime(); List searchResults = (List) context.results.getAtomicArray().asList(); AggregatedDfs dfs = aggregateDfs(searchResults); // TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs @@ -79,7 +81,7 @@ protected void run() { final CountedCollector counter = new CountedCollector<>( queryResult, searchResults.size(), - () -> context.executeNextPhase(NAME, () -> nextPhase(dfs)), + () -> onFinish(dfs), context ); @@ -130,6 +132,11 @@ public void onFailure(Exception exception) { } } + private void onFinish(AggregatedDfs dfs) { + context.getSearchResponseMetrics().recordSearchPhaseDuration(getName(), System.nanoTime() - phaseStartTimeInNanos); + context.executeNextPhase(NAME, () -> nextPhase(dfs)); + } + private void shardFailure( Exception exception, QuerySearchRequest querySearchRequest, diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java index d18d94ad40d59..d9e2fcfff0a48 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java @@ -15,7 +15,6 @@ import org.elasticsearch.telemetry.metric.MeterRegistry; import java.util.HashMap; -import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -45,9 +44,7 @@ public String getDisplayName() { public static final String TOOK_DURATION_TOTAL_HISTOGRAM_NAME = "es.search_response.took_durations.histogram"; public static final String RESPONSE_COUNT_TOTAL_COUNTER_NAME = "es.search_response.response_count.total"; - private static final String SEARCH_PHASE_METRIC_FORMAT = "es.search_response.took_durations.%s.histogram"; - private static final List SEARCH_PHASE_NAMES = List.of("dfs", "open_pit", "query"); private final LongHistogram tookDurationTotalMillisHistogram; private final LongCounter responseCountTotalCounter; @@ -75,6 +72,12 @@ public SearchResponseMetrics(MeterRegistry meterRegistry) { "The search phase dfs duration in milliseconds at the coordinator, expressed as a histogram", "millis" ), + "dfs_query", + meterRegistry.registerLongHistogram( + String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "dfs_query"), + "The search phase dfs_query duration in milliseconds at the coordinator, expressed as a histogram", + "millis" + ), "open_pit", meterRegistry.registerLongHistogram( String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "open_pit"), diff --git a/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java b/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java index a62e68ddc4c90..9739bd1a8624a 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java @@ -41,11 +41,10 @@ public class SearchPhaseCoordinatorAPMMetricsTests extends ESSingleNodeTestCase private static final String indexName = "test_coordinator_search_phase_metrics"; private final int num_primaries = randomIntBetween(2, 7); - // es.search_response.coordinator_phases.%s.duration.histogram - // es.search_response.took_durations. private static final String QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.query.histogram"; private static final String DFS_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs.histogram"; private static final String OPEN_PIT_SEARCH_PHASE_METRIC = "es.search_response.took_durations.open_pit.histogram"; + private static final String DFS_QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs_query.histogram"; @Override protected boolean resetNodeAfterTest() { @@ -90,7 +89,7 @@ public void testDfsSearch() { client().prepareSearch(indexName).setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(simpleQueryStringQuery("doc1")), "1" ); - assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC)); + assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC, DFS_QUERY_SEARCH_PHASE_METRIC)); } public void testPointInTime() {