Skip to content

Commit e24ea4b

Browse files
Can match search shard phase APM metric (#136646)
Adds the following metric to record the duration of the can-match search phase on a per shard basis: - es.search.shards.phases.can_match.duration.histogram
1 parent 7de5108 commit e24ea4b

File tree

5 files changed

+65
-1
lines changed

5 files changed

+65
-1
lines changed

docs/changelog/136646.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 136646
2+
summary: Can match search shard phase APM metric
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchPhaseAPMMetrics.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,23 @@
2222

2323
public final class ShardSearchPhaseAPMMetrics implements SearchOperationListener {
2424

25+
public static final String CAN_MATCH_SEARCH_PHASE_METRIC = "es.search.shards.phases.can_match.duration.histogram";
2526
public static final String DFS_SEARCH_PHASE_METRIC = "es.search.shards.phases.dfs.duration.histogram";
2627
public static final String QUERY_SEARCH_PHASE_METRIC = "es.search.shards.phases.query.duration.histogram";
2728
public static final String FETCH_SEARCH_PHASE_METRIC = "es.search.shards.phases.fetch.duration.histogram";
2829

30+
private final LongHistogram canMatchPhaseMetric;
2931
private final LongHistogram dfsPhaseMetric;
3032
private final LongHistogram queryPhaseMetric;
3133
private final LongHistogram fetchPhaseMetric;
3234

3335
public ShardSearchPhaseAPMMetrics(MeterRegistry meterRegistry) {
36+
this.canMatchPhaseMetric = meterRegistry.registerLongHistogram(
37+
CAN_MATCH_SEARCH_PHASE_METRIC,
38+
"Can match phase execution times at the shard level, expressed as a histogram",
39+
"ms"
40+
);
41+
3442
this.dfsPhaseMetric = meterRegistry.registerLongHistogram(
3543
DFS_SEARCH_PHASE_METRIC,
3644
"DFS search phase execution times at the shard level, expressed as a histogram",
@@ -48,6 +56,11 @@ public ShardSearchPhaseAPMMetrics(MeterRegistry meterRegistry) {
4856
);
4957
}
5058

59+
@Override
60+
public void onCanMatchPhase(long tookInNanos) {
61+
recordPhaseLatency(canMatchPhaseMetric, tookInNanos);
62+
}
63+
5164
@Override
5265
public void onDfsPhase(SearchContext searchContext, long tookInNanos) {
5366
recordPhaseLatency(dfsPhaseMetric, tookInNanos);

server/src/main/java/org/elasticsearch/index/shard/SearchOperationListener.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,14 @@ default void onDfsPhase(SearchContext searchContext, long tookInNanos) {}
8787
*/
8888
default void onFailedDfsPhase(SearchContext searchContext) {}
8989

90+
/**
91+
* Executed after the can-match phase successfully finished.
92+
* Note: this is not invoked if the can match phase execution failed.
93+
*
94+
* @param tookInNanos the number of nanoseconds the can-match execution took
95+
*/
96+
default void onCanMatchPhase(long tookInNanos) {}
97+
9098
/**
9199
* Executed when a new reader context was created
92100
* @param readerContext the created context
@@ -237,6 +245,17 @@ public void onDfsPhase(SearchContext searchContext, long tookInNanos) {
237245
}
238246
}
239247

248+
@Override
249+
public void onCanMatchPhase(long tookInNanos) {
250+
for (SearchOperationListener listener : listeners) {
251+
try {
252+
listener.onCanMatchPhase(tookInNanos);
253+
} catch (Exception e) {
254+
logger.warn(() -> "onCanMatchPhase listener [" + listener + "] failed", e);
255+
}
256+
}
257+
}
258+
240259
@Override
241260
public void onNewReaderContext(ReaderContext readerContext) {
242261
for (SearchOperationListener listener : listeners) {

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1937,9 +1937,14 @@ public void canMatch(CanMatchNodeRequest request, ActionListener<CanMatchNodeRes
19371937
var shardLevelRequests = request.getShardLevelRequests();
19381938
final List<CanMatchNodeResponse.ResponseOrFailure> responses = new ArrayList<>(shardLevelRequests.size());
19391939
for (var shardLevelRequest : shardLevelRequests) {
1940+
long shardCanMatchStartTimeInNanos = System.nanoTime();
1941+
ShardSearchRequest shardSearchRequest = request.createShardSearchRequest(shardLevelRequest);
1942+
final IndexService indexService = indicesService.indexServiceSafe(shardSearchRequest.shardId().getIndex());
1943+
final IndexShard indexShard = indexService.getShard(shardSearchRequest.shardId().id());
19401944
try {
19411945
// TODO remove the exception handling as it's now in canMatch itself
1942-
responses.add(new CanMatchNodeResponse.ResponseOrFailure(canMatch(request.createShardSearchRequest(shardLevelRequest))));
1946+
responses.add(new CanMatchNodeResponse.ResponseOrFailure(canMatch(shardSearchRequest)));
1947+
indexShard.getSearchOperationListener().onCanMatchPhase(System.nanoTime() - shardCanMatchStartTimeInNanos);
19431948
} catch (Exception e) {
19441949
responses.add(new CanMatchNodeResponse.ResponseOrFailure(e));
19451950
}

server/src/test/java/org/elasticsearch/search/TelemetryMetrics/ShardSearchPhaseAPMMetricsTests.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
4141
import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery;
42+
import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.CAN_MATCH_SEARCH_PHASE_METRIC;
4243
import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.DFS_SEARCH_PHASE_METRIC;
4344
import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.FETCH_SEARCH_PHASE_METRIC;
4445
import static org.elasticsearch.index.search.stats.ShardSearchPhaseAPMMetrics.QUERY_SEARCH_PHASE_METRIC;
@@ -255,6 +256,25 @@ public void testSearchTransportMetricsScrollSystem() {
255256
);
256257
}
257258

259+
public void testCanMatchSearch() {
260+
assertSearchHitsWithoutFailures(
261+
client().prepareSearch(indexName)
262+
.setSearchType(SearchType.QUERY_THEN_FETCH)
263+
.setPreFilterShardSize(1)
264+
.setQuery(simpleQueryStringQuery("doc1")),
265+
"1"
266+
);
267+
268+
final List<Measurement> canMatchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(CAN_MATCH_SEARCH_PHASE_METRIC);
269+
assertEquals(num_primaries, canMatchMeasurements.size());
270+
final List<Measurement> queryMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(QUERY_SEARCH_PHASE_METRIC);
271+
assertEquals(num_primaries, queryMeasurements.size());
272+
assertAttributes(queryMeasurements, false, false);
273+
final List<Measurement> fetchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(FETCH_SEARCH_PHASE_METRIC);
274+
assertEquals(1, fetchMeasurements.size());
275+
assertAttributes(fetchMeasurements, false, false);
276+
}
277+
258278
private static void assertAttributes(List<Measurement> measurements, boolean isSystem, boolean isScroll) {
259279
for (Measurement measurement : measurements) {
260280
Map<String, Object> attributes = measurement.attributes();
@@ -346,6 +366,8 @@ public void testTimeRangeFilterAllResults() {
346366
assertSearchHits(searchResponse, "1", "2");
347367
assertThat(searchResponse.getSkippedShards(), Matchers.greaterThanOrEqualTo(num_primaries - 2));
348368
});
369+
final List<Measurement> canMatchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(CAN_MATCH_SEARCH_PHASE_METRIC);
370+
assertEquals(num_primaries, canMatchMeasurements.size());
349371
final List<Measurement> queryMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(QUERY_SEARCH_PHASE_METRIC);
350372
// the two docs are at most spread across two shards, other shards are empty and get filtered out
351373
assertThat(queryMeasurements.size(), Matchers.lessThanOrEqualTo(2));

0 commit comments

Comments
 (0)