Skip to content

Commit 7438ee8

Browse files
Fetch search phase coordinator duration APM metric (#136547)
Adds the following APM metric to track the duration of the fetch phase at the coordinator node: - es.search_response.took_durations.fetch.histogram
1 parent 556aae5 commit 7438ee8

File tree

4 files changed

+31
-11
lines changed

4 files changed

+31
-11
lines changed

docs/changelog/136547.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 136547
2+
summary: Fetch search phase coordinator duration APM metric
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ public void onFailure(Exception e) {
9292

9393
private void innerRun() throws Exception {
9494
assert this.reducedQueryPhase == null ^ this.resultConsumer == null;
95+
long phaseStartTimeInNanos = System.nanoTime();
9596
// depending on whether we executed the RankFeaturePhase we may or may not have the reduced query result computed already
9697
final var reducedQueryPhase = this.reducedQueryPhase == null ? resultConsumer.reduce() : this.reducedQueryPhase;
9798
final int numShards = context.getNumShards();
@@ -104,22 +105,27 @@ private void innerRun() throws Exception {
104105
if (queryAndFetchOptimization) {
105106
assert assertConsistentWithQueryAndFetchOptimization();
106107
// query AND fetch optimization
107-
moveToNextPhase(searchPhaseShardResults, reducedQueryPhase);
108+
moveToNextPhase(searchPhaseShardResults, reducedQueryPhase, phaseStartTimeInNanos);
108109
} else {
109110
ScoreDoc[] scoreDocs = reducedQueryPhase.sortedTopDocs().scoreDocs();
110111
// no docs to fetch -- sidestep everything and return
111112
if (scoreDocs.length == 0) {
112113
// we have to release contexts here to free up resources
113114
searchPhaseShardResults.asList()
114115
.forEach(searchPhaseShardResult -> releaseIrrelevantSearchContext(searchPhaseShardResult, context));
115-
moveToNextPhase(new AtomicArray<>(0), reducedQueryPhase);
116+
moveToNextPhase(new AtomicArray<>(0), reducedQueryPhase, phaseStartTimeInNanos);
116117
} else {
117-
innerRunFetch(scoreDocs, numShards, reducedQueryPhase);
118+
innerRunFetch(scoreDocs, numShards, reducedQueryPhase, phaseStartTimeInNanos);
118119
}
119120
}
120121
}
121122

122-
private void innerRunFetch(ScoreDoc[] scoreDocs, int numShards, SearchPhaseController.ReducedQueryPhase reducedQueryPhase) {
123+
private void innerRunFetch(
124+
ScoreDoc[] scoreDocs,
125+
int numShards,
126+
SearchPhaseController.ReducedQueryPhase reducedQueryPhase,
127+
long phaseStartTimeInNanos
128+
) {
123129
ArraySearchPhaseResults<FetchSearchResult> fetchResults = new ArraySearchPhaseResults<>(numShards);
124130
final List<Map<Integer, RankDoc>> rankDocsPerShard = false == shouldExplainRankScores(context.getRequest())
125131
? null
@@ -133,7 +139,7 @@ private void innerRunFetch(ScoreDoc[] scoreDocs, int numShards, SearchPhaseContr
133139
docIdsToLoad.length, // we count down every shard in the result no matter if we got any results or not
134140
() -> {
135141
try (fetchResults) {
136-
moveToNextPhase(fetchResults.getAtomicArray(), reducedQueryPhase);
142+
moveToNextPhase(fetchResults.getAtomicArray(), reducedQueryPhase, phaseStartTimeInNanos);
137143
}
138144
},
139145
context
@@ -258,8 +264,10 @@ public void onFailure(Exception e) {
258264

259265
private void moveToNextPhase(
260266
AtomicArray<? extends SearchPhaseResult> fetchResultsArr,
261-
SearchPhaseController.ReducedQueryPhase reducedQueryPhase
267+
SearchPhaseController.ReducedQueryPhase reducedQueryPhase,
268+
long phaseStartTimeInNanos
262269
) {
270+
context.getSearchResponseMetrics().recordSearchPhaseDuration(getName(), System.nanoTime() - phaseStartTimeInNanos);
263271
context.executeNextPhase(NAME, () -> {
264272
var resp = SearchPhaseController.merge(context.getRequest().scroll() != null, reducedQueryPhase, fetchResultsArr);
265273
context.addReleasable(resp);

server/src/main/java/org/elasticsearch/rest/action/search/SearchResponseMetrics.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ public SearchResponseMetrics(MeterRegistry meterRegistry) {
7878
"The search phase dfs_query duration in milliseconds at the coordinator, expressed as a histogram",
7979
"millis"
8080
),
81+
"fetch",
82+
meterRegistry.registerLongHistogram(
83+
String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "fetch"),
84+
"The search phase fetch duration in milliseconds at the coordinator, expressed as a histogram",
85+
"millis"
86+
),
8187
"open_pit",
8288
meterRegistry.registerLongHistogram(
8389
String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "open_pit"),

server/src/test/java/org/elasticsearch/rest/action/search/SearchPhaseCoordinatorAPMMetricsTests.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ public class SearchPhaseCoordinatorAPMMetricsTests extends ESSingleNodeTestCase
4141
private static final String indexName = "test_coordinator_search_phase_metrics";
4242
private final int num_primaries = randomIntBetween(2, 7);
4343

44-
private static final String QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.query.histogram";
44+
private static final String DFS_QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs_query.histogram";
4545
private static final String DFS_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs.histogram";
46+
private static final String FETCH_SEARCH_PHASE_METRIC = "es.search_response.took_durations.fetch.histogram";
4647
private static final String OPEN_PIT_SEARCH_PHASE_METRIC = "es.search_response.took_durations.open_pit.histogram";
47-
private static final String DFS_QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs_query.histogram";
48+
private static final String QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.query.histogram";
4849

4950
@Override
5051
protected boolean resetNodeAfterTest() {
@@ -81,15 +82,15 @@ public void testSearchQueryThenFetch() {
8182
client().prepareSearch(indexName).setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(simpleQueryStringQuery("doc1")),
8283
"1"
8384
);
84-
assertMeasurements(List.of(QUERY_SEARCH_PHASE_METRIC));
85+
assertMeasurements(List.of(QUERY_SEARCH_PHASE_METRIC, FETCH_SEARCH_PHASE_METRIC));
8586
}
8687

8788
public void testDfsSearch() {
8889
assertSearchHitsWithoutFailures(
8990
client().prepareSearch(indexName).setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(simpleQueryStringQuery("doc1")),
9091
"1"
9192
);
92-
assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC, DFS_QUERY_SEARCH_PHASE_METRIC));
93+
assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC, DFS_QUERY_SEARCH_PHASE_METRIC, FETCH_SEARCH_PHASE_METRIC));
9394
}
9495

9596
public void testPointInTime() {
@@ -105,7 +106,7 @@ public void testPointInTime() {
105106
.setQuery(simpleQueryStringQuery("doc1")),
106107
"1"
107108
);
108-
assertMeasurements(List.of(OPEN_PIT_SEARCH_PHASE_METRIC, QUERY_SEARCH_PHASE_METRIC));
109+
assertMeasurements(List.of(OPEN_PIT_SEARCH_PHASE_METRIC, QUERY_SEARCH_PHASE_METRIC, FETCH_SEARCH_PHASE_METRIC));
109110
} finally {
110111
client().execute(TransportClosePointInTimeAction.TYPE, new ClosePointInTimeRequest(pointInTimeId)).actionGet();
111112
}

0 commit comments

Comments
 (0)