Skip to content

Commit 431c13d

Browse files
rockdabootelasticsearchmachinejakelandis
authored andcommitted
[Profiling] Aggregate flamegraph by process name and thread name (elastic#119115)
* Add field process.executable.name to profiling-events * Amend query to aggregate events by executable name * Send flamegraph row with grouping by executable name * Flamegraph sub-aggregation by thread name * Rework internal data model * Cleanups * Fix building tests * Fix GetStackTracesResponseTests * Fix unit tests * Fix remaining unit tests * [CI] Auto commit changes from spotless * Fix flamegraph yaml tests * Fix yaml REST tests * Increase INDEX_TEMPLATE_VERSION for profiling.executable.name * Fix yamlRestCompatTest Co-authored-by: Jake Landis <[email protected]> * Rename executable name to process name * Remove warnings meant for testing * Replace ChunkedToXContentHelper.wrapWithObject() with .object() * Fix comment in ProfilingIndexTemplateRegistry.java * Simplify sorting of unique stacktrace and host IDs * [CI] Auto commit changes from spotless * Add 'missing()' to aggregations * Fix syntax error after resolving merge conflicts * Revert "Rename executable name to process name" This reverts commit e514874. * Set FRAMETYPE_EXECUTABLE to 0x103 * Fix TransportGetFlamegraphActionTests --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Jake Landis <[email protected]>
1 parent c9e72dc commit 431c13d

File tree

19 files changed

+321
-275
lines changed

19 files changed

+321
-275
lines changed

x-pack/plugin/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ tasks.named("yamlRestCompatTestTransform").configure({ task ->
111111
task.skipTest("esql/40_tsdb/from index pattern unsupported counter", "TODO: support for subset of metric fields")
112112
task.skipTest("esql/40_unsupported_types/unsupported", "TODO: support for subset of metric fields")
113113
task.skipTest("esql/40_unsupported_types/unsupported with sort", "TODO: support for subset of metric fields")
114+
task.replaceValueInMatch("Size", 49, "Test flamegraph from profiling-events")
115+
task.replaceValueInMatch("Size", 49, "Test flamegraph from test-events")
114116
})
115117

116118
tasks.named('yamlRestCompatTest').configure {

x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-events.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"service.name",
1616
"host.name",
1717
"container.name",
18+
"process.executable.name",
1819
"process.thread.name",
1920
"@timestamp"
2021
],
@@ -26,6 +27,7 @@
2627
"asc",
2728
"asc",
2829
"asc",
30+
"asc",
2931
"desc"
3032
]
3133
}
@@ -67,6 +69,9 @@
6769
"process.thread.name": {
6870
"type": "keyword"
6971
},
72+
"process.executable.name": {
73+
"type": "keyword"
74+
},
7075
"Stacktrace.count": {
7176
"type": "short",
7277
"index": false

x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetFlameGraphActionIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ public void testGetStackTracesUnfiltered() throws Exception {
2626
);
2727
GetFlamegraphResponse response = client().execute(GetFlamegraphAction.INSTANCE, request).get();
2828
// only spot-check top level properties - detailed tests are done in unit tests
29-
assertEquals(994, response.getSize());
29+
assertEquals(1010, response.getSize());
3030
assertEquals(1.0d, response.getSamplingRate(), 0.001d);
3131
assertEquals(46, response.getSelfCPU());
32-
assertEquals(1903, response.getTotalCPU());
32+
assertEquals(1995, response.getTotalCPU());
3333
assertEquals(46, response.getTotalSamples());
3434

3535
// The root node's values are the same as the top-level values.

x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.index.query.TermQueryBuilder;
1313

1414
import java.util.List;
15+
import java.util.Map;
1516

1617
public class GetStackTracesActionIT extends ProfilingTestCase {
1718
public void testGetStackTracesUnfiltered() throws Exception {
@@ -36,7 +37,11 @@ public void testGetStackTracesUnfiltered() throws Exception {
3637
assertEquals(1821, response.getTotalFrames());
3738

3839
assertNotNull(response.getStackTraceEvents());
39-
assertEquals(3L, response.getStackTraceEvents().get("L7kj7UvlKbT-vN73el4faQ").count);
40+
41+
Map<TraceEventID, TraceEvent> traceEvents = response.getStackTraceEvents();
42+
43+
TraceEventID traceEventID = new TraceEventID("", "497295213074376", "8457605156473051743", "L7kj7UvlKbT-vN73el4faQ");
44+
assertEquals(3L, response.getStackTraceEvents().get(traceEventID).count);
4045

4146
assertNotNull(response.getStackTraces());
4247
// just do a high-level spot check. Decoding is tested in unit-tests
@@ -45,8 +50,6 @@ public void testGetStackTracesUnfiltered() throws Exception {
4550
assertEquals(18, stackTrace.fileIds.length);
4651
assertEquals(18, stackTrace.frameIds.length);
4752
assertEquals(18, stackTrace.typeIds.length);
48-
assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d);
49-
assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d);
5053
// not determined by default
5154
assertNull(stackTrace.subGroups);
5255

@@ -80,7 +83,10 @@ public void testGetStackTracesGroupedByServiceName() throws Exception {
8083
assertEquals(1821, response.getTotalFrames());
8184

8285
assertNotNull(response.getStackTraceEvents());
83-
assertEquals(3L, response.getStackTraceEvents().get("L7kj7UvlKbT-vN73el4faQ").count);
86+
87+
TraceEventID traceEventID = new TraceEventID("", "497295213074376", "8457605156473051743", "L7kj7UvlKbT-vN73el4faQ");
88+
assertEquals(3L, response.getStackTraceEvents().get(traceEventID).count);
89+
assertEquals(Long.valueOf(2L), response.getStackTraceEvents().get(traceEventID).subGroups.getCount("basket"));
8490

8591
assertNotNull(response.getStackTraces());
8692
// just do a high-level spot check. Decoding is tested in unit-tests
@@ -89,9 +95,6 @@ public void testGetStackTracesGroupedByServiceName() throws Exception {
8995
assertEquals(18, stackTrace.fileIds.length);
9096
assertEquals(18, stackTrace.frameIds.length);
9197
assertEquals(18, stackTrace.typeIds.length);
92-
assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d);
93-
assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d);
94-
assertEquals(Long.valueOf(2L), stackTrace.subGroups.getCount("basket"));
9598

9699
assertNotNull(response.getStackFrames());
97100
StackFrame stackFrame = response.getStackFrames().get("8NlMClggx8jaziUTJXlmWAAAAAAAAIYI");
@@ -127,8 +130,13 @@ public void testGetStackTracesFromAPMWithMatchNoDownsampling() throws Exception
127130
assertEquals(1.0d, response.getSamplingRate(), 0.001d);
128131

129132
assertNotNull(response.getStackTraceEvents());
130-
assertEquals(3L, response.getStackTraceEvents().get("Ce77w10WeIDow3kd1jowlA").count);
131-
assertEquals(2L, response.getStackTraceEvents().get("JvISdnJ47BQ01489cwF9DA").count);
133+
134+
TraceEventID traceEventID = new TraceEventID("", "", "", "Ce77w10WeIDow3kd1jowlA");
135+
assertEquals(3L, response.getStackTraceEvents().get(traceEventID).count);
136+
assertEquals(Long.valueOf(3L), response.getStackTraceEvents().get(traceEventID).subGroups.getCount("encodeSha1"));
137+
138+
traceEventID = new TraceEventID("", "", "", "JvISdnJ47BQ01489cwF9DA");
139+
assertEquals(2L, response.getStackTraceEvents().get(traceEventID).count);
132140

133141
assertNotNull(response.getStackTraces());
134142
// just do a high-level spot check. Decoding is tested in unit-tests
@@ -137,9 +145,6 @@ public void testGetStackTracesFromAPMWithMatchNoDownsampling() throws Exception
137145
assertEquals(39, stackTrace.fileIds.length);
138146
assertEquals(39, stackTrace.frameIds.length);
139147
assertEquals(39, stackTrace.typeIds.length);
140-
assertTrue(stackTrace.annualCO2Tons > 0.0d);
141-
assertTrue(stackTrace.annualCostsUSD > 0.0d);
142-
assertEquals(Long.valueOf(3L), stackTrace.subGroups.getCount("encodeSha1"));
143148

144149
assertNotNull(response.getStackFrames());
145150
StackFrame stackFrame = response.getStackFrames().get("fhsEKXDuxJ-jIJrZpdRuSAAAAAAAAFtj");
@@ -175,9 +180,13 @@ public void testGetStackTracesFromAPMWithMatchAndDownsampling() throws Exception
175180
assertEquals(0.2d, response.getSamplingRate(), 0.001d);
176181

177182
assertNotNull(response.getStackTraceEvents());
183+
178184
// as the sampling rate is 0.2, we see 5 times more samples (random sampler agg automatically adjusts sample count)
179-
assertEquals(5 * 3L, response.getStackTraceEvents().get("Ce77w10WeIDow3kd1jowlA").count);
180-
assertEquals(5 * 2L, response.getStackTraceEvents().get("JvISdnJ47BQ01489cwF9DA").count);
185+
TraceEventID traceEventID = new TraceEventID("", "", "", "Ce77w10WeIDow3kd1jowlA");
186+
assertEquals(5 * 3L, response.getStackTraceEvents().get(traceEventID).count);
187+
188+
traceEventID = new TraceEventID("", "", "", "JvISdnJ47BQ01489cwF9DA");
189+
assertEquals(5 * 2L, response.getStackTraceEvents().get(traceEventID).count);
181190

182191
assertNotNull(response.getStackTraces());
183192
// just do a high-level spot check. Decoding is tested in unit-tests
@@ -186,8 +195,6 @@ public void testGetStackTracesFromAPMWithMatchAndDownsampling() throws Exception
186195
assertEquals(39, stackTrace.fileIds.length);
187196
assertEquals(39, stackTrace.frameIds.length);
188197
assertEquals(39, stackTrace.typeIds.length);
189-
assertTrue(stackTrace.annualCO2Tons > 0.0d);
190-
assertTrue(stackTrace.annualCostsUSD > 0.0d);
191198
// not determined by default
192199
assertNull(stackTrace.subGroups);
193200

x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/GetStackTracesResponse.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public class GetStackTracesResponse extends ActionResponse implements ChunkedToX
2929
@Nullable
3030
private final Map<String, String> executables;
3131
@Nullable
32-
private final Map<String, TraceEvent> stackTraceEvents;
32+
private final Map<TraceEventID, TraceEvent> stackTraceEvents;
3333
private final int totalFrames;
3434
private final double samplingRate;
3535
private final long totalSamples;
@@ -38,7 +38,7 @@ public GetStackTracesResponse(
3838
Map<String, StackTrace> stackTraces,
3939
Map<String, StackFrame> stackFrames,
4040
Map<String, String> executables,
41-
Map<String, TraceEvent> stackTraceEvents,
41+
Map<TraceEventID, TraceEvent> stackTraceEvents,
4242
int totalFrames,
4343
double samplingRate,
4444
long totalSamples
@@ -69,7 +69,7 @@ public Map<String, String> getExecutables() {
6969
return executables;
7070
}
7171

72-
public Map<String, TraceEvent> getStackTraceEvents() {
72+
public Map<TraceEventID, TraceEvent> getStackTraceEvents() {
7373
return stackTraceEvents;
7474
}
7575

@@ -96,22 +96,24 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
9696
optional(
9797
"stack_trace_events",
9898
stackTraceEvents,
99-
(n, v) -> ChunkedToXContentHelper.object(n, v, entry -> (b, p) -> b.field(entry.getKey(), entry.getValue().count))
99+
(n, v) -> ChunkedToXContentHelper.object(
100+
n,
101+
Iterators.map(v.entrySet().iterator(), e -> (b, p) -> b.field(e.getKey().stacktraceID(), e.getValue().count))
102+
)
100103
),
101-
Iterators.single((b, p) -> b.field("sampling_rate", samplingRate)),
104+
Iterators.single((b, p) -> b.field("sampling_rate", samplingRate).endObject())
102105
// the following fields are intentionally not written to the XContent representation (only needed on the transport layer):
103106
//
104107
// * start
105108
// * end
106109
// * totalSamples
107-
ChunkedToXContentHelper.endObject()
108110
);
109111
}
110112

111-
private static <T> Iterator<? extends ToXContent> optional(
113+
private static <K, T> Iterator<? extends ToXContent> optional(
112114
String name,
113-
Map<String, T> values,
114-
BiFunction<String, Map<String, T>, Iterator<? extends ToXContent>> supplier
115+
Map<K, T> values,
116+
BiFunction<String, Map<K, T>, Iterator<? extends ToXContent>> supplier
115117
) {
116118
return (values != null) ? supplier.apply(name, values) : Collections.emptyIterator();
117119
}

x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/GetStackTracesResponseBuilder.java

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
package org.elasticsearch.xpack.profiling.action;
99

1010
import java.time.Instant;
11-
import java.util.List;
1211
import java.util.Map;
1312

1413
class GetStackTracesResponseBuilder {
@@ -18,8 +17,7 @@ class GetStackTracesResponseBuilder {
1817
private int totalFrames;
1918
private Map<String, StackFrame> stackFrames;
2019
private Map<String, String> executables;
21-
private Map<String, TraceEvent> stackTraceEvents;
22-
private List<TransportGetStackTracesAction.HostEventCount> hostEventCounts;
20+
private Map<TraceEventID, TraceEvent> stackTraceEvents;
2321
private double samplingRate;
2422
private long totalSamples;
2523
private Double requestedDuration;
@@ -67,19 +65,11 @@ public void setExecutables(Map<String, String> executables) {
6765
this.executables = executables;
6866
}
6967

70-
public void setStackTraceEvents(Map<String, TraceEvent> stackTraceEvents) {
68+
public void setStackTraceEvents(Map<TraceEventID, TraceEvent> stackTraceEvents) {
7169
this.stackTraceEvents = stackTraceEvents;
7270
}
7371

74-
public void setHostEventCounts(List<TransportGetStackTracesAction.HostEventCount> hostEventCounts) {
75-
this.hostEventCounts = hostEventCounts;
76-
}
77-
78-
public List<TransportGetStackTracesAction.HostEventCount> getHostEventCounts() {
79-
return hostEventCounts;
80-
}
81-
82-
public Map<String, TraceEvent> getStackTraceEvents() {
72+
public Map<TraceEventID, TraceEvent> getStackTraceEvents() {
8373
return stackTraceEvents;
8474
}
8575

@@ -149,17 +139,17 @@ public void setTotalSamples(long totalSamples) {
149139
public GetStackTracesResponse build() {
150140
// Merge the TraceEvent data into the StackTraces.
151141
if (stackTraces != null) {
152-
for (Map.Entry<String, StackTrace> entry : stackTraces.entrySet()) {
153-
String stacktraceID = entry.getKey();
154-
TraceEvent event = stackTraceEvents.get(stacktraceID);
155-
if (event != null) {
156-
StackTrace stackTrace = entry.getValue();
157-
stackTrace.count = event.count;
142+
for (Map.Entry<TraceEventID, TraceEvent> entry : stackTraceEvents.entrySet()) {
143+
TraceEventID traceEventID = entry.getKey();
144+
StackTrace stackTrace = stackTraces.get(traceEventID.stacktraceID());
145+
if (stackTrace != null) {
146+
TraceEvent event = entry.getValue();
158147
if (event.subGroups != null) {
159148
stackTrace.subGroups = event.subGroups;
160149
}
161-
stackTrace.annualCO2Tons = event.annualCO2Tons;
162-
stackTrace.annualCostsUSD = event.annualCostsUSD;
150+
stackTrace.count += event.count;
151+
stackTrace.annualCO2Tons += event.annualCO2Tons;
152+
stackTrace.annualCostsUSD += event.annualCostsUSD;
163153
}
164154
}
165155
}

x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/StackTrace.java

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,14 @@ final class StackTrace implements ToXContentObject {
3131
double annualCostsUSD;
3232
long count;
3333

34-
StackTrace(
35-
int[] addressOrLines,
36-
String[] fileIds,
37-
String[] frameIds,
38-
int[] typeIds,
39-
double annualCO2Tons,
40-
double annualCostsUSD,
41-
long count
42-
) {
34+
StackTrace(int[] addressOrLines, String[] fileIds, String[] frameIds, int[] typeIds) {
4335
this.addressOrLines = addressOrLines;
4436
this.fileIds = fileIds;
4537
this.frameIds = frameIds;
4638
this.typeIds = typeIds;
47-
this.annualCO2Tons = annualCO2Tons;
48-
this.annualCostsUSD = annualCostsUSD;
49-
this.count = count;
39+
annualCO2Tons = 0.0d;
40+
annualCostsUSD = 0.0d;
41+
count = 0;
5042
}
5143

5244
private static final int BASE64_FRAME_ID_LENGTH = 32;
@@ -218,7 +210,7 @@ public static StackTrace fromSource(Map<String, Object> source) {
218210
// Step 2: Convert the run-length byte encoding into a list of uint8s.
219211
int[] typeIDs = runLengthDecodeBase64Url(inputFrameTypes, inputFrameTypes.length(), countsFrameIDs);
220212

221-
return new StackTrace(addressOrLines, fileIDs, frameIDs, typeIDs, 0, 0, 0);
213+
return new StackTrace(addressOrLines, fileIDs, frameIDs, typeIDs);
222214
}
223215

224216
public void forNativeAndKernelFrames(Consumer<String> consumer) {
@@ -232,15 +224,15 @@ public void forNativeAndKernelFrames(Consumer<String> consumer) {
232224

233225
@Override
234226
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
235-
builder.startObject();
236-
builder.field("address_or_lines", this.addressOrLines);
237-
builder.field("file_ids", this.fileIds);
238-
builder.field("frame_ids", this.frameIds);
239-
builder.field("type_ids", this.typeIds);
240-
builder.field("annual_co2_tons", this.annualCO2Tons);
241-
builder.field("annual_costs_usd", this.annualCostsUSD);
242-
builder.field("count", this.count);
243-
builder.endObject();
227+
builder.startObject()
228+
.field("address_or_lines", this.addressOrLines)
229+
.field("file_ids", this.fileIds)
230+
.field("frame_ids", this.frameIds)
231+
.field("type_ids", this.typeIds)
232+
.field("annual_co2_tons", this.annualCO2Tons)
233+
.field("annual_costs_usd", this.annualCostsUSD)
234+
.field("count", this.count)
235+
.endObject();
244236
return builder;
245237
}
246238

x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/TraceEvent.java

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,29 @@
77

88
package org.elasticsearch.xpack.profiling.action;
99

10-
import java.util.Objects;
11-
1210
final class TraceEvent {
13-
final String stacktraceID;
11+
long count;
1412
double annualCO2Tons;
1513
double annualCostsUSD;
16-
long count;
1714
SubGroup subGroups;
1815

19-
TraceEvent(String stacktraceID) {
20-
this(stacktraceID, 0);
16+
TraceEvent() {
17+
this(0);
2118
}
2219

23-
TraceEvent(String stacktraceID, long count) {
24-
this.stacktraceID = stacktraceID;
20+
TraceEvent(long count) {
2521
this.count = count;
2622
}
2723

28-
@Override
29-
public boolean equals(Object o) {
30-
if (this == o) {
31-
return true;
32-
}
33-
if (o == null || getClass() != o.getClass()) {
34-
return false;
35-
}
36-
TraceEvent event = (TraceEvent) o;
37-
return count == event.count && Objects.equals(stacktraceID, event.stacktraceID);
38-
}
39-
40-
@Override
41-
public int hashCode() {
42-
return Objects.hash(stacktraceID, count);
43-
}
44-
4524
@Override
4625
public String toString() {
4726
return "TraceEvent{"
48-
+ "stacktraceID='"
49-
+ stacktraceID
50-
+ '\''
27+
+ "count="
28+
+ count
5129
+ ", annualCO2Tons="
5230
+ annualCO2Tons
5331
+ ", annualCostsUSD="
5432
+ annualCostsUSD
55-
+ ", count="
56-
+ count
5733
+ ", subGroups="
5834
+ subGroups
5935
+ '}';

0 commit comments

Comments
 (0)