From 540ef47fc061657bfd6a737b2b9fc924139906e4 Mon Sep 17 00:00:00 2001 From: Moritz Mack Date: Sun, 26 Oct 2025 09:59:16 +0100 Subject: [PATCH] Fix APM tracer to respect recording status of spans (#137015) Co-authored-by: Jack Shirazi --- .../apm/internal/tracing/APMTracer.java | 11 +++ .../apm/internal/tracing/APMTracerTests.java | 69 ++++++++++++++----- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java index 4c2f1f740693a..3b6d931964a4d 100644 --- a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java +++ b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java @@ -189,6 +189,17 @@ public void startTrace(TraceContext traceContext, Traceable traceable, String sp spanBuilder.setStartTimestamp(startTime); } final Span span = spanBuilder.startSpan(); + // If the agent decided not to record this span (e.g., due to transaction_max_spans), isRecording() will be false. + if (span.isRecording() == false) { + logger.trace("Span [{}] [{}] will not be recorded, e.g. due to transaction_max_spans reached", spanId, spanName); + // It's good practice to end the no-op span immediately to release any resources. + span.end(); + // Returning null from computeIfAbsent means no value will be inserted into the map. + return null; + } + + // If we are here, the span is real and being recorded. + logger.trace("Successfully started tracing [{}] [{}]", spanId, spanName); final Context contextForNewSpan = Context.current().with(span); updateThreadContext(traceContext, services, contextForNewSpan); diff --git a/modules/apm/src/test/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracerTests.java b/modules/apm/src/test/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracerTests.java index 3a82aa129ed87..60f4c80b1d7fd 100644 --- a/modules/apm/src/test/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracerTests.java +++ b/modules/apm/src/test/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracerTests.java @@ -25,6 +25,8 @@ import org.elasticsearch.telemetry.apm.internal.APMAgentSettings; import org.elasticsearch.telemetry.tracing.Traceable; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.junit.annotations.TestLogging; +import org.mockito.Mockito; import java.time.Instant; import java.util.HashMap; @@ -44,6 +46,7 @@ import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +@TestLogging(reason = "improved visibility", value = "org.elasticsearch.telemetry.apm.internal.tracing:TRACE") public class APMTracerTests extends ESTestCase { private static final Traceable TRACEABLE1 = new TestTraceable("id1"); @@ -90,6 +93,37 @@ public void test_onTraceStarted_startsTrace() { assertThat(apmTracer.getSpans(), hasKey(TRACEABLE1.getSpanId())); } + /** + * Check that when a trace is started, but it is not recorded, e.g. due to sampling, the tracer does not record it either. + */ + public void test_onTraceStarted_ifNotRecorded_doesNotStartTrace() { + Settings settings = Settings.builder().put(APMAgentSettings.TELEMETRY_TRACING_ENABLED_SETTING.getKey(), true).build(); + APMTracer apmTracer = buildTracer(settings); + + apmTracer.startTrace(new ThreadContext(settings), TRACEABLE1, "name1_discard", null); + + assertThat(apmTracer.getSpans(), anEmptyMap()); + } + + /** + * Check that when a nested trace is discarded e.g.g due to transaction_max_spans exceeded, the tracer does not record it. + */ + public void test_onNestedTraceStarted_ifNotRecorded_doesNotStartTrace() { + Settings settings = Settings.builder().put(APMAgentSettings.TELEMETRY_TRACING_ENABLED_SETTING.getKey(), true).build(); + APMTracer apmTracer = buildTracer(settings); + + ThreadContext traceContext = new ThreadContext(settings); + apmTracer.startTrace(traceContext, TRACEABLE1, "name1", null); + try (var ignore1 = traceContext.newTraceContext()) { + apmTracer.startTrace(traceContext, TRACEABLE2, "name2_discard", null); + try (var ignore2 = traceContext.newTraceContext()) { + apmTracer.startTrace(traceContext, TRACEABLE3, "name3_discard", null); + } + } + assertThat(apmTracer.getSpans(), aMapWithSize(1)); + assertThat(apmTracer.getSpans(), hasKey(TRACEABLE1.getSpanId())); + } + /** * Checks that when a trace is started with a specific start time, the tracer starts a span and records it. */ @@ -266,8 +300,7 @@ APMServices createApmServices() { Tracer mockTracer = mock(Tracer.class); doAnswer(invocation -> { String spanName = (String) invocation.getArguments()[0]; - // spy the spanBuilder - return new SpySpanBuilder(apmServices.tracer(), spanName); + return new MockSpanBuilder(spanName); }).when(mockTracer).spanBuilder(anyString()); return new APMServices(mockTracer, apmServices.openTelemetry()); } @@ -276,81 +309,81 @@ Instant getSpanStartTime(String spanName) { return spanStartTimeMap.get(spanName); } - class SpySpanBuilder implements SpanBuilder { + /** + * There's no APM agent in unit tests. Spans created by the default span builder would be NOOP spans that are not recorded. + * This builder simulates recorded spans so that we can test the tracer behavior. + */ + class MockSpanBuilder implements SpanBuilder { - SpanBuilder delegatedSpanBuilder; + Span span; Instant startTime; String spanName; - SpySpanBuilder(Tracer tracer, String spanName) { - this.delegatedSpanBuilder = tracer.spanBuilder(spanName); + MockSpanBuilder(String spanName) { this.spanName = spanName; + this.span = Mockito.mock(Span.class, spanName); + // simulate discarded span due to transaction_max_spans exceeded + Mockito.when(span.isRecording()).thenReturn(spanName.endsWith("_discard") == false); + Mockito.when(span.storeInContext(Mockito.any(Context.class))).thenCallRealMethod(); } @Override public SpanBuilder setParent(Context context) { - delegatedSpanBuilder.setParent(context); + SpanContext spanContext = Span.fromContext(context).getSpanContext(); + Mockito.when(span.getSpanContext()).thenReturn(spanContext); return this; } @Override public SpanBuilder setNoParent() { - delegatedSpanBuilder.setNoParent(); + SpanContext invalid = SpanContext.getInvalid(); + Mockito.when(span.getSpanContext()).thenReturn(invalid); return this; } @Override public SpanBuilder addLink(SpanContext spanContext) { - delegatedSpanBuilder.addLink(spanContext); return this; } @Override public SpanBuilder addLink(SpanContext spanContext, Attributes attributes) { - delegatedSpanBuilder.addLink(spanContext, attributes); return this; } @Override public SpanBuilder setAttribute(String key, String value) { - delegatedSpanBuilder.setAttribute(key, value); return this; } @Override public SpanBuilder setAttribute(String key, long value) { - delegatedSpanBuilder.setAttribute(key, value); return this; } @Override public SpanBuilder setAttribute(String key, double value) { - delegatedSpanBuilder.setAttribute(key, value); return this; } @Override public SpanBuilder setAttribute(String key, boolean value) { - delegatedSpanBuilder.setAttribute(key, value); return this; } @Override public SpanBuilder setAttribute(AttributeKey key, T value) { - delegatedSpanBuilder.setAttribute(key, value); return this; } @Override public SpanBuilder setSpanKind(SpanKind spanKind) { - delegatedSpanBuilder.setSpanKind(spanKind); return this; } @Override public SpanBuilder setStartTimestamp(long startTimestamp, TimeUnit unit) { startTime = Instant.ofEpochMilli(TimeUnit.MILLISECONDS.convert(startTimestamp, unit)); - delegatedSpanBuilder.setStartTimestamp(startTimestamp, unit); return this; } @@ -358,7 +391,7 @@ public SpanBuilder setStartTimestamp(long startTimestamp, TimeUnit unit) { public Span startSpan() { // finally record the spanName-startTime association when the span is actually started spanStartTimeMap.put(spanName, startTime); - return delegatedSpanBuilder.startSpan(); + return span; } } }