Amend query to aggregate events by executable name

rockdaboot · rockdaboot · commit 302475f5817e · 2025-01-02T17:25:47.000+01:00
diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/GetStackTracesResponseBuilder.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/GetStackTracesResponseBuilder.java
@@ -20,6 +20,7 @@ class GetStackTracesResponseBuilder {
     private Map<String, String> executables;
     private Map<String, TraceEvent> stackTraceEvents;
     private List<TransportGetStackTracesAction.HostEventCount> hostEventCounts;
+    private List<TransportGetStackTracesAction.ExecutableEventCount> executableEventCounts;
     private double samplingRate;
     private long totalSamples;
     private Double requestedDuration;
@@ -75,10 +76,18 @@ public void setHostEventCounts(List<TransportGetStackTracesAction.HostEventCount
         this.hostEventCounts = hostEventCounts;
     }
 
+    public void setExecutableEventCounts(List<TransportGetStackTracesAction.ExecutableEventCount> executableEventCounts) {
+        this.executableEventCounts = executableEventCounts;
+    }
+
     public List<TransportGetStackTracesAction.HostEventCount> getHostEventCounts() {
         return hostEventCounts;
     }
 
+    public List<TransportGetStackTracesAction.ExecutableEventCount> getExecutableEventCounts() {
+        return executableEventCounts;
+    }
+
     public Map<String, TraceEvent> getStackTraceEvents() {
         return stackTraceEvents;
     }
diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/TransportGetStackTracesAction.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/TransportGetStackTracesAction.java
@@ -317,6 +317,9 @@ private void searchEventGroupedByStackTrace(
         GetStackTracesResponseBuilder responseBuilder,
         EventsIndex eventsIndex
     ) {
+        // We have nested aggregations, which in theory might blow up to MAX_TRACE_EVENTS_RESULT_SIZE^2 items
+        // reported. But we know that the total number of items is limited by our down-sampling to
+        // a maximum of ~100k (MAX_TRACE_EVENTS_RESULT_SIZE is higher to be on the safe side).
         responseBuilder.setSamplingRate(eventsIndex.getSampleRate());
         TermsAggregationBuilder groupByStackTraceId = new TermsAggregationBuilder("group_by")
             // 'size' should be max 100k, but might be slightly more. Better be on the safe side.
@@ -326,6 +329,14 @@ private void searchEventGroupedByStackTrace(
             // Especially with high cardinality fields, this makes aggregations really slow.
             .executionHint("map")
             .subAggregation(new SumAggregationBuilder("count").field("Stacktrace.count"));
+        TermsAggregationBuilder groupByHostId = new TermsAggregationBuilder("group_by")
+            // 'size' specifies the max number of host ID we support per request.
+            .size(MAX_TRACE_EVENTS_RESULT_SIZE)
+            .field("host.id")
+            // 'execution_hint: map' skips the slow building of ordinals that we don't need.
+            // Especially with high cardinality fields, this makes aggregations really slow.
+            .executionHint("map")
+            .subAggregation(groupByStackTraceId);
         SubGroupCollector subGroups = SubGroupCollector.attach(
             groupByStackTraceId,
             request.getAggregationFields(),
@@ -341,62 +352,74 @@ private void searchEventGroupedByStackTrace(
             .addAggregation(new MinAggregationBuilder("min_time").field("@timestamp"))
             .addAggregation(new MaxAggregationBuilder("max_time").field("@timestamp"))
             .addAggregation(
-                // We have nested aggregations, which in theory might blow up to MAX_TRACE_EVENTS_RESULT_SIZE^2 items
-                // reported. But we know that the total number of items is limited by our down-sampling to
-                // a maximum of ~100k (MAX_TRACE_EVENTS_RESULT_SIZE is higher to be on the safe side).
                 new TermsAggregationBuilder("group_by")
                     // 'size' specifies the max number of host ID we support per request.
                     .size(MAX_TRACE_EVENTS_RESULT_SIZE)
-                    .field("host.id")
+                    .field("process.executable.name")
                     // 'execution_hint: map' skips the slow building of ordinals that we don't need.
                     // Especially with high cardinality fields, this makes aggregations really slow.
                     .executionHint("map")
-                    .subAggregation(groupByStackTraceId)
+                    .subAggregation(groupByHostId)
             )
             .addAggregation(new SumAggregationBuilder("total_count").field("Stacktrace.count"))
             .execute(handleEventsGroupedByStackTrace(submitTask, client, responseBuilder, submitListener, searchResponse -> {
                 long totalCount = getAggValueAsLong(searchResponse, "total_count");
 
                 Resampler resampler = new Resampler(request, responseBuilder.getSamplingRate(), totalCount);
-                Terms hosts = searchResponse.getAggregations().get("group_by");
 
                 // Sort items lexicographically to access Lucene's term dictionary more efficiently when issuing an mget request.
                 // The term dictionary is lexicographically sorted and using the same order reduces the number of page faults
                 // needed to load it.
                 long totalFinalCount = 0;
                 List<HostEventCount> hostEventCounts = new ArrayList<>(MAX_TRACE_EVENTS_RESULT_SIZE);
+                List<ExecutableEventCount> executableEventCounts = new ArrayList<>(MAX_TRACE_EVENTS_RESULT_SIZE);
                 Map<String, TraceEvent> stackTraceEvents = new TreeMap<>();
-                for (Terms.Bucket hostBucket : hosts.getBuckets()) {
-                    String hostid = hostBucket.getKeyAsString();
-
-                    Terms stacktraces = hostBucket.getAggregations().get("group_by");
-                    for (Terms.Bucket stacktraceBucket : stacktraces.getBuckets()) {
-                        Sum count = stacktraceBucket.getAggregations().get("count");
-                        int finalCount = resampler.adjustSampleCount((int) count.value());
-                        if (finalCount <= 0) {
-                            continue;
-                        }
-                        totalFinalCount += finalCount;
-
-                        /*
-                        The same stacktraces may come from different hosts (eventually from different datacenters).
-                        We make a list of the triples here. As soon as we have the host metadata, we can calculate
-                        the CO2 emission and the costs for each TraceEvent.
-                         */
-                        String stackTraceID = stacktraceBucket.getKeyAsString();
-                        hostEventCounts.add(new HostEventCount(hostid, stackTraceID, finalCount));
-
-                        TraceEvent event = stackTraceEvents.get(stackTraceID);
-                        if (event == null) {
-                            event = new TraceEvent(stackTraceID);
-                            stackTraceEvents.put(stackTraceID, event);
+
+                Terms executableNames = searchResponse.getAggregations().get("group_by");
+                for (Terms.Bucket executableBucket : executableNames.getBuckets()) {
+                    String executableName = executableBucket.getKeyAsString();
+
+                    Terms hosts = executableBucket.getAggregations().get("group_by");
+                    for (Terms.Bucket hostBucket : hosts.getBuckets()) {
+                        String hostid = hostBucket.getKeyAsString();
+
+                        Terms stacktraces = hostBucket.getAggregations().get("group_by");
+                        for (Terms.Bucket stacktraceBucket : stacktraces.getBuckets()) {
+                            Sum count = stacktraceBucket.getAggregations().get("count");
+                            int finalCount = resampler.adjustSampleCount((int) count.value());
+                            if (finalCount <= 0) {
+                                continue;
+                            }
+                            totalFinalCount += finalCount;
+
+                            String stackTraceID = stacktraceBucket.getKeyAsString();
+
+                            /*
+                            The same stacktraces may come from different executables.
+                            We make a list of the triples here.
+                             */
+                            executableEventCounts.add(new ExecutableEventCount(executableName, stackTraceID, finalCount));
+
+                            /*
+                            The same stacktraces may come from different hosts (eventually from different datacenters).
+                            We make a list of the triples here. As soon as we have the host metadata, we can calculate
+                            the CO2 emission and the costs for each TraceEvent.
+                             */
+                            hostEventCounts.add(new HostEventCount(hostid, stackTraceID, finalCount));
+
+                            TraceEvent event = stackTraceEvents.get(stackTraceID);
+                            if (event == null) {
+                                event = new TraceEvent(stackTraceID);
+                                stackTraceEvents.put(stackTraceID, event);
+                            }
+                            event.count += finalCount;
+                            subGroups.collectResults(stacktraceBucket, event);
                         }
-                        event.count += finalCount;
-                        subGroups.collectResults(stacktraceBucket, event);
                     }
                 }
                 responseBuilder.setTotalSamples(totalFinalCount);
                 responseBuilder.setHostEventCounts(hostEventCounts);
+                responseBuilder.setExecutableEventCounts(executableEventCounts);
                 log.debug(
                     "Found [{}] stacktrace events, resampled with sample rate [{}] to [{}] events ([{}] unique stack traces).",
                     totalCount,
@@ -834,4 +857,6 @@ private void mget(Client client, List<Index> indices, List<String> slice, Action
     }
 
     record HostEventCount(String hostID, String stacktraceID, int count) {}
+
+    record ExecutableEventCount(String executableName, String stacktraceID, int count) {}
 }