Skip to content

Commit 302475f

Browse files
committed
Amend query to aggregate events by executable name
1 parent 8b3c38c commit 302475f

File tree

2 files changed

+66
-32
lines changed

2 files changed

+66
-32
lines changed

x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/GetStackTracesResponseBuilder.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class GetStackTracesResponseBuilder {
2020
private Map<String, String> executables;
2121
private Map<String, TraceEvent> stackTraceEvents;
2222
private List<TransportGetStackTracesAction.HostEventCount> hostEventCounts;
23+
private List<TransportGetStackTracesAction.ExecutableEventCount> executableEventCounts;
2324
private double samplingRate;
2425
private long totalSamples;
2526
private Double requestedDuration;
@@ -75,10 +76,18 @@ public void setHostEventCounts(List<TransportGetStackTracesAction.HostEventCount
7576
this.hostEventCounts = hostEventCounts;
7677
}
7778

79+
public void setExecutableEventCounts(List<TransportGetStackTracesAction.ExecutableEventCount> executableEventCounts) {
80+
this.executableEventCounts = executableEventCounts;
81+
}
82+
7883
public List<TransportGetStackTracesAction.HostEventCount> getHostEventCounts() {
7984
return hostEventCounts;
8085
}
8186

87+
public List<TransportGetStackTracesAction.ExecutableEventCount> getExecutableEventCounts() {
88+
return executableEventCounts;
89+
}
90+
8291
public Map<String, TraceEvent> getStackTraceEvents() {
8392
return stackTraceEvents;
8493
}

x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/TransportGetStackTracesAction.java

Lines changed: 57 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,9 @@ private void searchEventGroupedByStackTrace(
317317
GetStackTracesResponseBuilder responseBuilder,
318318
EventsIndex eventsIndex
319319
) {
320+
// We have nested aggregations, which in theory might blow up to MAX_TRACE_EVENTS_RESULT_SIZE^2 items
321+
// reported. But we know that the total number of items is limited by our down-sampling to
322+
// a maximum of ~100k (MAX_TRACE_EVENTS_RESULT_SIZE is higher to be on the safe side).
320323
responseBuilder.setSamplingRate(eventsIndex.getSampleRate());
321324
TermsAggregationBuilder groupByStackTraceId = new TermsAggregationBuilder("group_by")
322325
// 'size' should be max 100k, but might be slightly more. Better be on the safe side.
@@ -326,6 +329,14 @@ private void searchEventGroupedByStackTrace(
326329
// Especially with high cardinality fields, this makes aggregations really slow.
327330
.executionHint("map")
328331
.subAggregation(new SumAggregationBuilder("count").field("Stacktrace.count"));
332+
TermsAggregationBuilder groupByHostId = new TermsAggregationBuilder("group_by")
333+
// 'size' specifies the max number of host ID we support per request.
334+
.size(MAX_TRACE_EVENTS_RESULT_SIZE)
335+
.field("host.id")
336+
// 'execution_hint: map' skips the slow building of ordinals that we don't need.
337+
// Especially with high cardinality fields, this makes aggregations really slow.
338+
.executionHint("map")
339+
.subAggregation(groupByStackTraceId);
329340
SubGroupCollector subGroups = SubGroupCollector.attach(
330341
groupByStackTraceId,
331342
request.getAggregationFields(),
@@ -341,62 +352,74 @@ private void searchEventGroupedByStackTrace(
341352
.addAggregation(new MinAggregationBuilder("min_time").field("@timestamp"))
342353
.addAggregation(new MaxAggregationBuilder("max_time").field("@timestamp"))
343354
.addAggregation(
344-
// We have nested aggregations, which in theory might blow up to MAX_TRACE_EVENTS_RESULT_SIZE^2 items
345-
// reported. But we know that the total number of items is limited by our down-sampling to
346-
// a maximum of ~100k (MAX_TRACE_EVENTS_RESULT_SIZE is higher to be on the safe side).
347355
new TermsAggregationBuilder("group_by")
348356
// 'size' specifies the max number of host ID we support per request.
349357
.size(MAX_TRACE_EVENTS_RESULT_SIZE)
350-
.field("host.id")
358+
.field("process.executable.name")
351359
// 'execution_hint: map' skips the slow building of ordinals that we don't need.
352360
// Especially with high cardinality fields, this makes aggregations really slow.
353361
.executionHint("map")
354-
.subAggregation(groupByStackTraceId)
362+
.subAggregation(groupByHostId)
355363
)
356364
.addAggregation(new SumAggregationBuilder("total_count").field("Stacktrace.count"))
357365
.execute(handleEventsGroupedByStackTrace(submitTask, client, responseBuilder, submitListener, searchResponse -> {
358366
long totalCount = getAggValueAsLong(searchResponse, "total_count");
359367

360368
Resampler resampler = new Resampler(request, responseBuilder.getSamplingRate(), totalCount);
361-
Terms hosts = searchResponse.getAggregations().get("group_by");
362369

363370
// Sort items lexicographically to access Lucene's term dictionary more efficiently when issuing an mget request.
364371
// The term dictionary is lexicographically sorted and using the same order reduces the number of page faults
365372
// needed to load it.
366373
long totalFinalCount = 0;
367374
List<HostEventCount> hostEventCounts = new ArrayList<>(MAX_TRACE_EVENTS_RESULT_SIZE);
375+
List<ExecutableEventCount> executableEventCounts = new ArrayList<>(MAX_TRACE_EVENTS_RESULT_SIZE);
368376
Map<String, TraceEvent> stackTraceEvents = new TreeMap<>();
369-
for (Terms.Bucket hostBucket : hosts.getBuckets()) {
370-
String hostid = hostBucket.getKeyAsString();
371-
372-
Terms stacktraces = hostBucket.getAggregations().get("group_by");
373-
for (Terms.Bucket stacktraceBucket : stacktraces.getBuckets()) {
374-
Sum count = stacktraceBucket.getAggregations().get("count");
375-
int finalCount = resampler.adjustSampleCount((int) count.value());
376-
if (finalCount <= 0) {
377-
continue;
378-
}
379-
totalFinalCount += finalCount;
380-
381-
/*
382-
The same stacktraces may come from different hosts (eventually from different datacenters).
383-
We make a list of the triples here. As soon as we have the host metadata, we can calculate
384-
the CO2 emission and the costs for each TraceEvent.
385-
*/
386-
String stackTraceID = stacktraceBucket.getKeyAsString();
387-
hostEventCounts.add(new HostEventCount(hostid, stackTraceID, finalCount));
388-
389-
TraceEvent event = stackTraceEvents.get(stackTraceID);
390-
if (event == null) {
391-
event = new TraceEvent(stackTraceID);
392-
stackTraceEvents.put(stackTraceID, event);
377+
378+
Terms executableNames = searchResponse.getAggregations().get("group_by");
379+
for (Terms.Bucket executableBucket : executableNames.getBuckets()) {
380+
String executableName = executableBucket.getKeyAsString();
381+
382+
Terms hosts = executableBucket.getAggregations().get("group_by");
383+
for (Terms.Bucket hostBucket : hosts.getBuckets()) {
384+
String hostid = hostBucket.getKeyAsString();
385+
386+
Terms stacktraces = hostBucket.getAggregations().get("group_by");
387+
for (Terms.Bucket stacktraceBucket : stacktraces.getBuckets()) {
388+
Sum count = stacktraceBucket.getAggregations().get("count");
389+
int finalCount = resampler.adjustSampleCount((int) count.value());
390+
if (finalCount <= 0) {
391+
continue;
392+
}
393+
totalFinalCount += finalCount;
394+
395+
String stackTraceID = stacktraceBucket.getKeyAsString();
396+
397+
/*
398+
The same stacktraces may come from different executables.
399+
We make a list of the triples here.
400+
*/
401+
executableEventCounts.add(new ExecutableEventCount(executableName, stackTraceID, finalCount));
402+
403+
/*
404+
The same stacktraces may come from different hosts (eventually from different datacenters).
405+
We make a list of the triples here. As soon as we have the host metadata, we can calculate
406+
the CO2 emission and the costs for each TraceEvent.
407+
*/
408+
hostEventCounts.add(new HostEventCount(hostid, stackTraceID, finalCount));
409+
410+
TraceEvent event = stackTraceEvents.get(stackTraceID);
411+
if (event == null) {
412+
event = new TraceEvent(stackTraceID);
413+
stackTraceEvents.put(stackTraceID, event);
414+
}
415+
event.count += finalCount;
416+
subGroups.collectResults(stacktraceBucket, event);
393417
}
394-
event.count += finalCount;
395-
subGroups.collectResults(stacktraceBucket, event);
396418
}
397419
}
398420
responseBuilder.setTotalSamples(totalFinalCount);
399421
responseBuilder.setHostEventCounts(hostEventCounts);
422+
responseBuilder.setExecutableEventCounts(executableEventCounts);
400423
log.debug(
401424
"Found [{}] stacktrace events, resampled with sample rate [{}] to [{}] events ([{}] unique stack traces).",
402425
totalCount,
@@ -834,4 +857,6 @@ private void mget(Client client, List<Index> indices, List<String> slice, Action
834857
}
835858

836859
record HostEventCount(String hostID, String stacktraceID, int count) {}
860+
861+
record ExecutableEventCount(String executableName, String stacktraceID, int count) {}
837862
}

0 commit comments

Comments
 (0)