Skip to content

Commit 8d5eabe

Browse files
authored
Minor fixes for APM tracing (#138587)
* If running a recent version of APM server, it's not necessary to track an unsampled root span (aka transaction) to report a correct duration. APM agent only reports these for servers < v8. * Cleanup parent trace headers if not tracing (recording), to avoid any further unnecessary attempts to create spans that are immediately discarded. This might be due to sampling or exceeding max transaction spans. Relates to ES-13389
1 parent 65426e8 commit 8d5eabe

File tree

5 files changed

+63
-18
lines changed

5 files changed

+63
-18
lines changed

build-tools/src/main/java/org/elasticsearch/gradle/testclusters/MockApmServer.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,26 @@ public void stop() {
114114
}
115115

116116
class RootHandler implements HttpHandler {
117+
// checked by APM agent to identify the APM server version to adjust its behavior accordingly
118+
private static final String FAKE_VERSION = """
119+
{
120+
"build_date": "2021-12-18T19:59:06Z",
121+
"build_sha": "24fe620eeff5a19e2133c940c7e5ce1ceddb1445",
122+
"publish_ready": true,
123+
"version": "9.0.0"
124+
}
125+
""";
126+
117127
public void handle(HttpExchange t) {
118128
try {
129+
if ("GET".equals(t.getRequestMethod()) && "/".equals(t.getRequestURI().getPath())) {
130+
t.sendResponseHeaders(200, FAKE_VERSION.length());
131+
try (OutputStream os = t.getResponseBody()) {
132+
os.write(FAKE_VERSION.getBytes());
133+
}
134+
return;
135+
}
136+
119137
InputStream body = t.getRequestBody();
120138
if (metricFilter == null && transactionFilter == null) {
121139
logRequestBody(body);

modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,14 @@ public void startTrace(TraceContext traceContext, Traceable traceable, String sp
192192
}
193193

194194
final Span span = spanBuilder.startSpan();
195-
// If not a root span (meaning a local parent exists) and the agent decided not to record the span, discard it immediately.
196-
// Root spans (transactions), however, have to be kept to correctly report their duration.
197-
if (localParentContext != null && span.isRecording() == false) {
198-
logger.trace("Span [{}] [{}] will not be recorded due to transaction_max_spans reached", spanId, spanName);
195+
if (span.isRecording() == false) {
196+
if (localParentContext == null) {
197+
// this root span (transactions) is dropped due to sampling; the agent might report these when connected to
198+
// very old versions of apm server, however (with an incorrect duration)
199+
logger.trace("Root span [{}] [{}] will not be recorded due to sampling", spanId, spanName);
200+
} else {
201+
logger.trace("Span [{}] [{}] will not be recorded due to transaction_max_spans reached", spanId, spanName);
202+
}
199203
span.end(); // end span immediately to release any resources.
200204
return null; // return null to discard and not record in map of spans
201205
}

modules/apm/src/test/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracerTests.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,7 @@ public void test_onTraceStarted_ifNotRecorded_doesNotStartTracing() {
109109
apmTracer.startTrace(traceContext, TRACEABLE1, "name1_discard", null);
110110

111111
assertThat(traceContext.getTransient(Task.APM_TRACE_CONTEXT), nullValue());
112-
// the root span (transaction) is tracked
113-
assertThat(apmTracer.getSpans(), aMapWithSize(1));
114-
assertThat(apmTracer.getSpans(), hasKey(TRACEABLE1.getSpanId()));
112+
assertThat(apmTracer.getSpans(), anEmptyMap());
115113
}
116114

117115
/**

server/src/main/java/org/elasticsearch/common/util/concurrent/ThreadContext.java

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,16 +205,41 @@ public StoredContext newTraceContext() {
205205
// this is the context when this method returns
206206
final ThreadContextStruct newContext;
207207

208-
final boolean hasTraceHeaders = originalContext.requestHeaders.containsKey(Task.TRACE_PARENT_HTTP_HEADER)
209-
|| originalContext.requestHeaders.containsKey(Task.TRACE_STATE)
210-
|| originalContext.transientHeaders.containsKey(Task.APM_TRACE_CONTEXT);
208+
final var requestHeaders = originalContext.requestHeaders;
209+
final var transientHeaders = originalContext.transientHeaders;
210+
211+
final boolean hasTraceHeaders = requestHeaders.containsKey(Task.TRACE_PARENT_HTTP_HEADER)
212+
|| requestHeaders.containsKey(Task.TRACE_STATE)
213+
|| transientHeaders.containsKey(Task.APM_TRACE_CONTEXT);
211214

212215
if (hasTraceHeaders == false) {
213-
// no need to copy if no trace headers are present
214-
newContext = originalContext;
216+
final boolean hasParentTraceHeaders = (transientHeaders.containsKey(Task.PARENT_TRACE_PARENT_HEADER)
217+
|| transientHeaders.containsKey(Task.PARENT_TRACE_STATE)
218+
|| transientHeaders.containsKey(Task.PARENT_APM_TRACE_CONTEXT));
219+
220+
if (hasParentTraceHeaders == false) {
221+
// no need to copy if no trace headers are present
222+
newContext = originalContext;
223+
} else {
224+
// tracing was stopped (e.g. after reaching max spans);
225+
// remove parent trace headers to not attempt creating any further, immediately discarded spans
226+
final Map<String, Object> newTransientHeaders = new HashMap<>(transientHeaders);
227+
newTransientHeaders.remove(Task.PARENT_TRACE_PARENT_HEADER);
228+
newTransientHeaders.remove(Task.PARENT_TRACE_STATE);
229+
newTransientHeaders.remove(Task.PARENT_APM_TRACE_CONTEXT);
230+
231+
newContext = new ThreadContextStruct(
232+
requestHeaders,
233+
originalContext.responseHeaders,
234+
newTransientHeaders,
235+
originalContext.isSystemContext,
236+
originalContext.warningHeadersSize
237+
);
238+
threadLocal.set(newContext);
239+
}
215240
} else {
216-
final Map<String, String> newRequestHeaders = new HashMap<>(originalContext.requestHeaders);
217-
final Map<String, Object> newTransientHeaders = new HashMap<>(originalContext.transientHeaders);
241+
final Map<String, String> newRequestHeaders = new HashMap<>(requestHeaders);
242+
final Map<String, Object> newTransientHeaders = new HashMap<>(transientHeaders);
218243

219244
final String previousTraceParent = newRequestHeaders.remove(Task.TRACE_PARENT_HTTP_HEADER);
220245
if (previousTraceParent != null) {

server/src/test/java/org/elasticsearch/tasks/TaskManagerTests.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,8 @@ public void testRegisterTaskStartsTracingIfTraceParentExists() {
287287
final Tracer mockTracer = mock(Tracer.class);
288288
final TaskManager taskManager = new TaskManager(Settings.EMPTY, threadPool, Set.of(), mockTracer);
289289

290-
// fake a parent APM trace context
291-
threadPool.getThreadContext().putTransient(Task.PARENT_APM_TRACE_CONTEXT, null);
290+
// fake an APM trace context
291+
threadPool.getThreadContext().putTransient(Task.APM_TRACE_CONTEXT, new Object());
292292
final boolean hasParentTask = randomBoolean();
293293
final TaskId parentTask = hasParentTask ? new TaskId("parentNode", 1) : TaskId.EMPTY_TASK_ID;
294294

@@ -408,8 +408,8 @@ public void testRegisterAndExecuteStartsTracingIfTraceParentExists() {
408408
final Tracer mockTracer = mock(Tracer.class);
409409
final TaskManager taskManager = new TaskManager(Settings.EMPTY, threadPool, Set.of(), mockTracer);
410410

411-
// fake a parent APM trace context
412-
threadPool.getThreadContext().putTransient(Task.PARENT_APM_TRACE_CONTEXT, null);
411+
// fake an APM trace context
412+
threadPool.getThreadContext().putTransient(Task.APM_TRACE_CONTEXT, new Object());
413413

414414
final Task task = taskManager.registerAndExecute(
415415
"testType",

0 commit comments

Comments
 (0)