Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/136481.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 136481
summary: Dfs query phase coordinator metric
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,13 @@ public SearchTransportService getSearchTransport() {
return searchTransportService;
}

/**
* Returns the {@link SearchResponseMetrics} to record search phase timings
*/
public SearchResponseMetrics getSearchResponseMetrics() {
return searchResponseMetrics;
}

public final void execute(Runnable command) {
executor.execute(command);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

/**
* This search phase fans out to every shards to execute a distributed search with a pre-collected distributed frequencies for all
* search terms used in the actual search query. This phase is very similar to a the default query-then-fetch search phase but it doesn't
* search terms used in the actual search query. This phase is very similar to the default query-then-fetch search phase, but it doesn't
* retry on another shard if any of the shards are failing. Failures are treated as shard failures and are counted as a non-successful
* operation.
* @see CountedCollector#onFailure(int, SearchShardTarget, Exception)
Expand All @@ -55,6 +55,7 @@ class DfsQueryPhase extends SearchPhase {
private final Client client;
private final AbstractSearchAsyncAction<?> context;
private final SearchProgressListener progressListener;
private long phaseStartTimeInNanos;

DfsQueryPhase(SearchPhaseResults<SearchPhaseResult> queryResult, Client client, AbstractSearchAsyncAction<?> context) {
super(NAME);
Expand All @@ -72,14 +73,15 @@ protected SearchPhase nextPhase(AggregatedDfs dfs) {
@SuppressWarnings("unchecked")
@Override
protected void run() {
phaseStartTimeInNanos = System.nanoTime();
List<DfsSearchResult> searchResults = (List<DfsSearchResult>) context.results.getAtomicArray().asList();
AggregatedDfs dfs = aggregateDfs(searchResults);
// TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs
// to free up memory early
final CountedCollector<SearchPhaseResult> counter = new CountedCollector<>(
queryResult,
searchResults.size(),
() -> context.executeNextPhase(NAME, () -> nextPhase(dfs)),
() -> onFinish(dfs),
context
);

Expand Down Expand Up @@ -130,6 +132,11 @@ public void onFailure(Exception exception) {
}
}

private void onFinish(AggregatedDfs dfs) {
context.getSearchResponseMetrics().recordSearchPhaseDuration(getName(), System.nanoTime() - phaseStartTimeInNanos);
context.executeNextPhase(NAME, () -> nextPhase(dfs));
}

private void shardFailure(
Exception exception,
QuerySearchRequest querySearchRequest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.elasticsearch.telemetry.metric.MeterRegistry;

import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -45,9 +44,7 @@ public String getDisplayName() {

public static final String TOOK_DURATION_TOTAL_HISTOGRAM_NAME = "es.search_response.took_durations.histogram";
public static final String RESPONSE_COUNT_TOTAL_COUNTER_NAME = "es.search_response.response_count.total";

private static final String SEARCH_PHASE_METRIC_FORMAT = "es.search_response.took_durations.%s.histogram";
private static final List<String> SEARCH_PHASE_NAMES = List.of("dfs", "open_pit", "query");

private final LongHistogram tookDurationTotalMillisHistogram;
private final LongCounter responseCountTotalCounter;
Expand Down Expand Up @@ -75,6 +72,12 @@ public SearchResponseMetrics(MeterRegistry meterRegistry) {
"The search phase dfs duration in milliseconds at the coordinator, expressed as a histogram",
"millis"
),
"dfs_query",
meterRegistry.registerLongHistogram(
String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "dfs_query"),
"The search phase dfs_query duration in milliseconds at the coordinator, expressed as a histogram",
"millis"
),
"open_pit",
meterRegistry.registerLongHistogram(
String.format(Locale.ROOT, SEARCH_PHASE_METRIC_FORMAT, "open_pit"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,10 @@ public class SearchPhaseCoordinatorAPMMetricsTests extends ESSingleNodeTestCase
private static final String indexName = "test_coordinator_search_phase_metrics";
private final int num_primaries = randomIntBetween(2, 7);

// es.search_response.coordinator_phases.%s.duration.histogram
// es.search_response.took_durations.
private static final String QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.query.histogram";
private static final String DFS_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs.histogram";
private static final String OPEN_PIT_SEARCH_PHASE_METRIC = "es.search_response.took_durations.open_pit.histogram";
private static final String DFS_QUERY_SEARCH_PHASE_METRIC = "es.search_response.took_durations.dfs_query.histogram";

@Override
protected boolean resetNodeAfterTest() {
Expand Down Expand Up @@ -90,7 +89,7 @@ public void testDfsSearch() {
client().prepareSearch(indexName).setSearchType(SearchType.DFS_QUERY_THEN_FETCH).setQuery(simpleQueryStringQuery("doc1")),
"1"
);
assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC));
assertMeasurements(List.of(DFS_SEARCH_PHASE_METRIC, DFS_QUERY_SEARCH_PHASE_METRIC));
}

public void testPointInTime() {
Expand Down