From 4feb480f9073d621ca34b7a6ef6687cad599ded8 Mon Sep 17 00:00:00 2001 From: Chris Parrinello Date: Wed, 8 Oct 2025 11:35:50 -0500 Subject: [PATCH 1/6] init From 9d25194e2509ed7e8d9634cb359332fe0c3742ec Mon Sep 17 00:00:00 2001 From: Chris Parrinello Date: Wed, 8 Oct 2025 13:30:51 -0500 Subject: [PATCH 2/6] Record APM metrics for DFS query phase coordinator duration --- .../action/search/AbstractSearchAsyncAction.java | 7 +++++++ .../org/elasticsearch/action/search/DfsQueryPhase.java | 10 ++++++++-- .../rest/action/search/SearchResponseMetrics.java | 2 -- .../search/SearchPhaseCoordinatorAPMMetricsTests.java | 5 ++--- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java index 845d8680e67c2..3291e849566d0 100644 --- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java @@ -691,6 +691,13 @@ public SearchTransportService getSearchTransport() { return searchTransportService; } + /** + * Returns the {@link SearchResponseMetrics} to record search phase timings + */ + public SearchResponseMetrics getSearchResponseMetrics() { + return searchResponseMetrics; + } + public final void execute(Runnable command) { executor.execute(command); } diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 71eb94459548c..cf08f157ca851 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -42,7 +42,7 @@ /** * This search phase fans out to every shards to execute a distributed search with a pre-collected distributed frequencies for all - * search terms used in the actual search query. This phase is very similar to a the default query-then-fetch search phase but it doesn't + * search terms used in the actual search query. This phase is very similar to the default query-then-fetch search phase, but it doesn't * retry on another shard if any of the shards are failing. Failures are treated as shard failures and are counted as a non-successful * operation. * @see CountedCollector#onFailure(int, SearchShardTarget, Exception) @@ -55,6 +55,7 @@ class DfsQueryPhase extends SearchPhase { private final Client client; private final AbstractSearchAsyncAction context; private final SearchProgressListener progressListener; + private long phaseStartTimeInNanos; DfsQueryPhase(SearchPhaseResults queryResult, Client client, AbstractSearchAsyncAction context) { super(NAME); @@ -79,7 +80,7 @@ protected void run() { final CountedCollector counter = new CountedCollector<>( queryResult, searchResults.size(), - () -> context.executeNextPhase(NAME, () -> nextPhase(dfs)), + () -> onFinish(dfs), context ); @@ -130,6 +131,11 @@ public void onFailure(Exception exception) { } } + private void onFinish(AggregatedDfs dfs) { + context.getSearchResponseMetrics().recordSearchPhaseDuration(getName(), System.nanoTime() - phaseStartTimeInNanos); + context.executeNextPhase(NAME, () -> nextPhase(dfs)); + } + private void shardFailure( Exception exception, QuerySearchRequest querySearchRequest, diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java index d18d94ad40d59..bc66b7a081d7c 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java @@ -45,9 +45,7 @@ public String getDisplayName() { public static final String TOOK_DURATION_TOTAL_HISTOGRAM_NAME = "es.search_response.took_durations.histogram"; public static final String RESPONSE_COUNT_TOTAL_COUNTER_NAME = "es.search_response.response_count.total"; - private static final String SEARCH_PHASE_METRIC_FORMAT = "es.search_response.took_durations.%s.histogram"; - private static final List SEARCH_PHASE_NAMES = List.of("dfs", "open_pit", "query"); private final LongHistogram tookDurationTotalMillisHistogram; private final LongCounter responseCountTotalCounter; diff --git a/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java b/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java index a62e68ddc4c90..9739bd1a8624a 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java @@ -41,11 +41,10 @@ public class SearchPhaseCoordinatorAPMMetricsTests extends ESSingleNodeTestCase private static final String indexName = "test_coordinator_search_phase_metrics"; private final int num_primaries = randomIntBetween(2, 7); - // es.search_response.coordinator_phases.%s.duration.histogram - // es.search_response.took_durations. private static final String QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.query.histogram"; private static final String DFS_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs.histogram"; private static final String OPEN_PIT_SEARCH_PHASE_METRIC = "es.search_response.took_durations.open_pit.histogram"; + private static final String DFS_QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs_query.histogram"; @Override protected boolean resetNodeAfterTest() { @@ -90,7 +89,7 @@ public void testDfsSearch() { client().prepareSearch(indexName).setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(simpleQueryStringQuery("doc1")), "1" ); - assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC)); + assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC, DFS_QUERY_SEARCH_PHASE_METRIC)); } public void testPointInTime() { From bb648060c9c0eecaf029332aaa2d778d4c9f5328 Mon Sep 17 00:00:00 2001 From: Chris Parrinello Date: Fri, 10 Oct 2025 09:45:57 -0500 Subject: [PATCH 3/6] merge from downstream branch --- .../rest/action/search/SearchResponseMetrics.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java index bc66b7a081d7c..f4bb89504980b 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java @@ -73,6 +73,12 @@ public SearchResponseMetrics(MeterRegistry meterRegistry) { "The search phase dfs duration in milliseconds at the coordinator, expressed as a histogram", "millis" ), + "dfs_query", + meterRegistry.registerLongHistogram( + String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "dfs_query"), + "The search phase dfs_query duration in milliseconds at the coordinator, expressed as a histogram", + "millis" + ), "open_pit", meterRegistry.registerLongHistogram( String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "open_pit"), From 42d00224f5dbfbd275eb5245f78fb68722a78463 Mon Sep 17 00:00:00 2001 From: Chris Parrinello Date: Mon, 13 Oct 2025 08:53:57 -0500 Subject: [PATCH 4/6] updates from upstream changes --- .../elasticsearch/rest/action/search/SearchResponseMetrics.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java index f4bb89504980b..d9e2fcfff0a48 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java @@ -15,7 +15,6 @@ import org.elasticsearch.telemetry.metric.MeterRegistry; import java.util.HashMap; -import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.TimeUnit; From 9a1ba67b41a6952b4606cedb9d104c66644528de Mon Sep 17 00:00:00 2001 From: Chris Parrinello Date: Mon, 13 Oct 2025 08:58:09 -0500 Subject: [PATCH 5/6] Update docs/changelog/136481.yaml --- docs/changelog/136481.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/136481.yaml diff --git a/docs/changelog/136481.yaml b/docs/changelog/136481.yaml new file mode 100644 index 0000000000000..629e31bdb6346 --- /dev/null +++ b/docs/changelog/136481.yaml @@ -0,0 +1,5 @@ +pr: 136481 +summary: Dfs query phase coordinator metric +area: Search +type: enhancement +issues: [] From c3c526c021cb4fc71a6244ec30ce13445a09f143 Mon Sep 17 00:00:00 2001 From: Chris Parrinello Date: Mon, 13 Oct 2025 15:26:46 -0500 Subject: [PATCH 6/6] set phase start time --- .../main/java/org/elasticsearch/action/search/DfsQueryPhase.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index cf08f157ca851..a12c11b525bff 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -73,6 +73,7 @@ protected SearchPhase nextPhase(AggregatedDfs dfs) { @SuppressWarnings("unchecked") @Override protected void run() { + phaseStartTimeInNanos = System.nanoTime(); List searchResults = (List) context.results.getAtomicArray().asList(); AggregatedDfs dfs = aggregateDfs(searchResults); // TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs