From 92f0a5adf69f21d6fd30d1194e6e5a5b861378da Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Tue, 26 Aug 2025 12:00:10 -0700 Subject: [PATCH 1/7] [AdaptiveSampling] Capture all spans in anomaly traces + use cache with TTL for saving traceIDs --- .../patches/opentelemetry-java-contrib.patch | 124 +++++++++++++----- 1 file changed, 90 insertions(+), 34 deletions(-) diff --git a/.github/patches/opentelemetry-java-contrib.patch b/.github/patches/opentelemetry-java-contrib.patch index afcd9ede47..cece598c36 100644 --- a/.github/patches/opentelemetry-java-contrib.patch +++ b/.github/patches/opentelemetry-java-contrib.patch @@ -1,5 +1,5 @@ diff --git a/aws-xray/build.gradle.kts b/aws-xray/build.gradle.kts -index ccec9d52..f764bba9 100644 +index ccec9d52..fddbad18 100644 --- a/aws-xray/build.gradle.kts +++ b/aws-xray/build.gradle.kts @@ -11,6 +11,7 @@ dependencies { @@ -10,6 +10,14 @@ index ccec9d52..f764bba9 100644 implementation("com.squareup.okhttp3:okhttp") implementation("io.opentelemetry:opentelemetry-semconv") +@@ -24,6 +25,7 @@ dependencies { + + implementation("com.fasterxml.jackson.core:jackson-core") + implementation("com.fasterxml.jackson.core:jackson-databind") ++ implementation("com.github.ben-manes.caffeine:caffeine:2.9.3") + + testImplementation("com.linecorp.armeria:armeria-junit5") + testImplementation("io.opentelemetry:opentelemetry-sdk-extension-autoconfigure") diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsSamplingResult.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsSamplingResult.java new file mode 100644 index 00000000..41f22f90 @@ -894,15 +902,17 @@ index 1ef8abf5..328e63dd 100644 } } diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -index 75977dc0..a60fec96 100644 +index 75977dc0..7cd9bcc5 100644 --- a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -@@ -5,42 +5,78 @@ +@@ -5,42 +5,81 @@ package io.opentelemetry.contrib.awsxray; +import static io.opentelemetry.semconv.HttpAttributes.HTTP_RESPONSE_STATUS_CODE; + ++import com.github.benmanes.caffeine.cache.Cache; ++import com.github.benmanes.caffeine.cache.Caffeine; +import io.opentelemetry.api.common.AttributeKey; import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.trace.Span; @@ -922,6 +932,7 @@ index 75977dc0..a60fec96 100644 +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; ++import java.time.Duration; import java.util.Arrays; import java.util.Comparator; import java.util.Date; @@ -930,7 +941,6 @@ index 75977dc0..a60fec96 100644 import java.util.Map; import java.util.Objects; import java.util.Set; -+import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; @@ -961,7 +971,8 @@ index 75977dc0..a60fec96 100644 + private final Map hashToRuleMap; + + private final boolean adaptiveSamplingRuleExists; -+ private final Set anomalyTracesSet; ++ private final Cache anomalyTracesCache; ++ private final Cache capturedTraceIdCache; + + @Nullable private AwsXrayAdaptiveSamplingConfig adaptiveSamplingConfig; + @Nullable private RateLimiter anomalyCaptureRateLimiter; @@ -977,7 +988,7 @@ index 75977dc0..a60fec96 100644 this( clientId, resource, -@@ -49,8 +85,17 @@ final class XrayRulesSampler implements Sampler { +@@ -49,8 +88,17 @@ final class XrayRulesSampler implements Sampler { rules.stream() // Lower priority value takes precedence so normal ascending sort. .sorted(Comparator.comparingInt(GetSamplingRulesResponse.SamplingRule::getPriority)) @@ -997,7 +1008,7 @@ index 75977dc0..a60fec96 100644 } private XrayRulesSampler( -@@ -58,12 +103,32 @@ final class XrayRulesSampler implements Sampler { +@@ -58,12 +106,42 @@ final class XrayRulesSampler implements Sampler { Resource resource, Clock clock, Sampler fallbackSampler, @@ -1018,8 +1029,18 @@ index 75977dc0..a60fec96 100644 + } + this.adaptiveSamplingRuleExists = adaptiveSamplingRuleExists; + this.adaptiveSamplingConfig = adaptiveSamplingConfig; -+ // The set is self-clearing, when spans close they are removed from the set -+ this.anomalyTracesSet = ConcurrentHashMap.newKeySet(100_000); ++ this.anomalyTracesCache = ++ Caffeine.newBuilder() ++ .maximumSize(100_000) ++ .ticker(clock::nanoTime) ++ .expireAfterWrite(Duration.ofMinutes(10)) ++ .build(); ++ this.capturedTraceIdCache = ++ Caffeine.newBuilder() ++ .maximumSize(10_000) ++ .ticker(clock::nanoTime) ++ .expireAfterWrite(Duration.ofMinutes(1)) ++ .build(); + + // Initialize anomaly capture rate limiter if error capture limit is configured + if (adaptiveSamplingConfig != null && adaptiveSamplingConfig.getErrorCaptureLimit() != null) { @@ -1031,7 +1052,7 @@ index 75977dc0..a60fec96 100644 } @Override -@@ -74,10 +139,36 @@ final class XrayRulesSampler implements Sampler { +@@ -74,10 +152,36 @@ final class XrayRulesSampler implements Sampler { SpanKind spanKind, Attributes attributes, List parentLinks) { @@ -1070,7 +1091,7 @@ index 75977dc0..a60fec96 100644 } } -@@ -96,7 +187,161 @@ final class XrayRulesSampler implements Sampler { +@@ -96,7 +200,164 @@ final class XrayRulesSampler implements Sampler { return "XrayRulesSampler{" + Arrays.toString(ruleAppliers) + "}"; } @@ -1175,10 +1196,12 @@ index 75977dc0..a60fec96 100644 + parentContext == null || !parentContext.isValid() || parentContext.isRemote(); + + // Anomaly Capture -+ if (shouldCaptureAnomalySpan -+ && !span.getSpanContext().isSampled() -+ && anomalyCaptureRateLimiter != null -+ && anomalyCaptureRateLimiter.trySpend(1)) { ++ if (capturedTraceIdCache.getIfPresent(traceId) != null ++ || (shouldCaptureAnomalySpan ++ && !span.getSpanContext().isSampled() ++ && anomalyCaptureRateLimiter != null ++ && anomalyCaptureRateLimiter.trySpend(1))) { ++ capturedTraceIdCache.put(traceId, true); + spanBatcher.accept(span); + } + @@ -1217,14 +1240,15 @@ index 75977dc0..a60fec96 100644 + if (shouldBoostSampling + && ruleToReportTo != null + && ruleToReportTo.hasBoost() -+ && this.anomalyTracesSet.add(traceId)) { ++ && this.anomalyTracesCache.getIfPresent(traceId) == null) { ++ this.anomalyTracesCache.put(traceId, true); + ruleToReportTo.countAnomalyTrace(span); + } + if (isLocalRootSpan) { + if (ruleToReportTo != null && ruleToReportTo.hasBoost()) { + ruleToReportTo.countTrace(); + } -+ this.anomalyTracesSet.remove(traceId); ++ this.anomalyTracesCache.invalidate(traceId); + } + } + } @@ -1233,7 +1257,7 @@ index 75977dc0..a60fec96 100644 return Arrays.stream(ruleAppliers) .map(rule -> rule.snapshot(now)) .filter(Objects::nonNull) -@@ -115,15 +360,16 @@ final class XrayRulesSampler implements Sampler { +@@ -115,15 +376,16 @@ final class XrayRulesSampler implements Sampler { Map ruleTargets, Set requestedTargetRuleNames, Date now) { @@ -1252,7 +1276,7 @@ index 75977dc0..a60fec96 100644 } if (requestedTargetRuleNames.contains(rule.getRuleName())) { // In practice X-Ray should return a target for any rule we requested but -@@ -135,6 +381,90 @@ final class XrayRulesSampler implements Sampler { +@@ -135,6 +397,90 @@ final class XrayRulesSampler implements Sampler { return rule; }) .toArray(SamplingRuleApplier[]::new); @@ -1340,8 +1364,8 @@ index 75977dc0..a60fec96 100644 + } + + // For testing -+ Set getAnomalyTracesSet() { -+ return anomalyTracesSet; ++ Cache getAnomalyTracesCache() { ++ return anomalyTracesCache; } } diff --git a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/AwsXrayRemoteSamplerTest.java b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/AwsXrayRemoteSamplerTest.java @@ -1981,7 +2005,7 @@ index 6bb6e82a..6d71711b 100644 return applier.shouldSample( Context.current(), diff --git a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java -index 1ca8df34..3c1dfda6 100644 +index 1ca8df34..f6a24af8 100644 --- a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java +++ b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java @@ -5,17 +5,28 @@ @@ -2201,7 +2225,7 @@ index 1ca8df34..3c1dfda6 100644 // Minimum is batTarget, 5s from now assertThat(sampler.nextTargetFetchTimeNanos()) -@@ -169,6 +251,731 @@ class XrayRulesSamplerTest { +@@ -169,6 +251,763 @@ class XrayRulesSamplerTest { assertThat(sampler.snapshot(Date.from(now))).hasSize(4); } @@ -2474,7 +2498,7 @@ index 1ca8df34..3c1dfda6 100644 + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); -+ assertThat(sampler.getAnomalyTracesSet().isEmpty()).isEqualTo(true); ++ assertThat(sampler.getAnomalyTracesCache().asMap().size()).isEqualTo(0); + } + + @Test @@ -2545,15 +2569,17 @@ index 1ca8df34..3c1dfda6 100644 + "TRACE_ID", "SPAN_ID", TraceFlags.getDefault(), TraceState.getDefault())); + SpanData spanDataMock = mock(SpanData.class); + Attributes attributesMock = mock(Attributes.class); -+ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID"); + when(spanDataMock.getAttributes()).thenReturn(attributesMock); + when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(500L); + LongAdder exportCounter = new LongAdder(); + Consumer stubbedConsumer = x -> exportCounter.increment(); + + // First span should be captured, second should be rate limited ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID1"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID2"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID3"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + // Only first span captured due to rate limiting + assertThat(exportCounter.sumThenReset()).isEqualTo(2L); @@ -2595,7 +2621,7 @@ index 1ca8df34..3c1dfda6 100644 + assertThat(snapshot.get(1).getBoostStatisticsDocument().getTotalCount()).isEqualTo(0); + assertThat(snapshot.get(1).getBoostStatisticsDocument().getAnomalyCount()).isEqualTo(0); + assertThat(snapshot.get(1).getBoostStatisticsDocument().getSampledAnomalyCount()).isEqualTo(0); -+ assertThat(sampler.getAnomalyTracesSet().isEmpty()).isEqualTo(true); ++ assertThat(sampler.getAnomalyTracesCache().asMap().size()).isEqualTo(0); + } + + @Test @@ -2686,15 +2712,17 @@ index 1ca8df34..3c1dfda6 100644 + + SpanData spanDataMock = mock(SpanData.class); + Attributes attributesMock = mock(Attributes.class); -+ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID"); + when(spanDataMock.getAttributes()).thenReturn(attributesMock); + when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(456L); + + LongAdder exportCounter = new LongAdder(); + Consumer stubbedConsumer = x -> exportCounter.increment(); + ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID1"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID2"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID3"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sum()).isEqualTo(2L); + } @@ -2752,15 +2780,17 @@ index 1ca8df34..3c1dfda6 100644 + + SpanData spanDataMock = mock(SpanData.class); + Attributes attributesMock = mock(Attributes.class); -+ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID"); + when(spanDataMock.getAttributes()).thenReturn(attributesMock); + when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(200L); + + LongAdder exportCounter = new LongAdder(); + Consumer stubbedConsumer = x -> exportCounter.add(1); + ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID1"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID2"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID3"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sum()).isEqualTo(2L); + } @@ -2826,22 +2856,23 @@ index 1ca8df34..3c1dfda6 100644 + LongAdder exportCounter = new LongAdder(); + Consumer stubbedConsumer = x -> exportCounter.add(1); + -+ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); -+ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID1"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sum()).isEqualTo(0L); + + when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(456L); + when(readableSpanMock.getLatencyNanos()).thenReturn(1L); -+ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); -+ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID2"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sum()).isEqualTo(0L); + + when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(456L); + when(readableSpanMock.getLatencyNanos()).thenReturn(300_000_000L); // 300 ms ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID3"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID4"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID5"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sum()).isEqualTo(2L); + } @@ -2916,18 +2947,43 @@ index 1ca8df34..3c1dfda6 100644 + assertThat(exportCounter.sumThenReset()).isEqualTo(2L); + + // Test non-matching operation ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID_2"); + when(attributesMock.get(URL_PATH)).thenReturn("/api1/ext"); + when(attributesMock.get(HTTP_METHOD)).thenReturn("POST"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + when(attributesMock.get(URL_PATH)).thenReturn("/non-matching"); + when(attributesMock.get(HTTP_METHOD)).thenReturn("GET"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); -+ assertThat(exportCounter.sum()).isEqualTo(0L); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(0L); + + // Test aws.local.operation takes priority + when(attributesMock.get(AwsAttributeKeys.AWS_LOCAL_OPERATION)).thenReturn("GET /api1"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); -+ assertThat(exportCounter.sum()).isEqualTo(1L); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(1L); ++ ++ // Test sending previously matched traceIDs gets captured ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID"); ++ when(attributesMock.get(AwsAttributeKeys.AWS_LOCAL_OPERATION)).thenReturn("GET /non-matching"); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID_2"); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(2L); ++ ++ // Test sending previously matched traceIDs gets captured as long as trace is active ++ clock.advance(Duration.ofSeconds(45)); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(1L); ++ clock.advance(Duration.ofSeconds(45)); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(1L); ++ clock.advance(Duration.ofSeconds(45)); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(1L); ++ ++ // Test sending non-matching trace after expire-time elapses ++ clock.advance(Duration.ofMinutes(100)); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(0L); + } + private static SamplingResult doSample(Sampler sampler, String name) { From 2bee3f307c6121175c57b9ffccec9c2676eaad04 Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Tue, 26 Aug 2025 14:33:03 -0700 Subject: [PATCH 2/7] Fix naming of local config --- .../patches/opentelemetry-java-contrib.patch | 36 +++++++++---------- ...licationSignalsCustomizerProviderTest.java | 2 +- .../adaptive-sampling-config-invalid.yaml | 2 +- .../adaptive-sampling-config-valid.yaml | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/patches/opentelemetry-java-contrib.patch b/.github/patches/opentelemetry-java-contrib.patch index cece598c36..9c8229c2d4 100644 --- a/.github/patches/opentelemetry-java-contrib.patch +++ b/.github/patches/opentelemetry-java-contrib.patch @@ -80,7 +80,7 @@ index 00000000..41f22f90 +} diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java new file mode 100644 -index 00000000..836ebf98 +index 00000000..b2146f13 --- /dev/null +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java @@ -0,0 +1,139 @@ @@ -207,8 +207,8 @@ index 00000000..836ebf98 + @JsonDeserialize( + builder = AutoValue_AwsXrayAdaptiveSamplingConfig_ErrorCaptureLimit.Builder.class) + public abstract static class ErrorCaptureLimit { -+ @JsonProperty("errorSpansPerSecond") -+ public abstract int getErrorSpansPerSecond(); ++ @JsonProperty("errorTracesPerSecond") ++ public abstract int getErrorTracesPerSecond(); + + public static Builder builder() { + return new AutoValue_AwsXrayAdaptiveSamplingConfig_ErrorCaptureLimit.Builder(); @@ -216,8 +216,8 @@ index 00000000..836ebf98 + + @AutoValue.Builder + public abstract static class Builder { -+ @JsonProperty("errorSpansPerSecond") -+ public abstract Builder setErrorSpansPerSecond(int value); ++ @JsonProperty("errorTracesPerSecond") ++ public abstract Builder setErrorTracesPerSecond(int value); + + public abstract ErrorCaptureLimit build(); + } @@ -902,7 +902,7 @@ index 1ef8abf5..328e63dd 100644 } } diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -index 75977dc0..7cd9bcc5 100644 +index 75977dc0..23d3d490 100644 --- a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java @@ -5,42 +5,81 @@ @@ -1044,10 +1044,10 @@ index 75977dc0..7cd9bcc5 100644 + + // Initialize anomaly capture rate limiter if error capture limit is configured + if (adaptiveSamplingConfig != null && adaptiveSamplingConfig.getErrorCaptureLimit() != null) { -+ int errorSpansPerSecond = -+ adaptiveSamplingConfig.getErrorCaptureLimit().getErrorSpansPerSecond(); ++ int errorTracesPerSecond = ++ adaptiveSamplingConfig.getErrorCaptureLimit().getErrorTracesPerSecond(); + this.anomalyCaptureRateLimiter = -+ new RateLimiter(errorSpansPerSecond, errorSpansPerSecond, clock); ++ new RateLimiter(errorTracesPerSecond, errorTracesPerSecond, clock); + } } @@ -1104,9 +1104,9 @@ index 75977dc0..7cd9bcc5 100644 + + // Initialize anomaly capture rate limiter if error capture limit is configured + if (config.getErrorCaptureLimit() != null) { -+ int errorSpansPerSecond = config.getErrorCaptureLimit().getErrorSpansPerSecond(); ++ int errorTracesPerSecond = config.getErrorCaptureLimit().getErrorTracesPerSecond(); + this.anomalyCaptureRateLimiter = -+ new RateLimiter(errorSpansPerSecond, errorSpansPerSecond, clock); ++ new RateLimiter(errorTracesPerSecond, errorTracesPerSecond, clock); + } + } + } @@ -2005,7 +2005,7 @@ index 6bb6e82a..6d71711b 100644 return applier.shouldSample( Context.current(), diff --git a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java -index 1ca8df34..f6a24af8 100644 +index 1ca8df34..2f25c8af 100644 --- a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java +++ b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java @@ -5,17 +5,28 @@ @@ -2300,7 +2300,7 @@ index 1ca8df34..f6a24af8 100644 + .setVersion(1.0) + .setErrorCaptureLimit( + AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorSpansPerSecond(2) ++ .setErrorTracesPerSecond(2) + .build()) + .build(); + @@ -2540,7 +2540,7 @@ index 1ca8df34..f6a24af8 100644 + .setVersion(1.0) + .setErrorCaptureLimit( + AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorSpansPerSecond(2) ++ .setErrorTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( @@ -2684,7 +2684,7 @@ index 1ca8df34..f6a24af8 100644 + .setVersion(1.0) + .setErrorCaptureLimit( + AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorSpansPerSecond(2) ++ .setErrorTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( @@ -2752,7 +2752,7 @@ index 1ca8df34..f6a24af8 100644 + .setVersion(1.0) + .setErrorCaptureLimit( + AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorSpansPerSecond(2) ++ .setErrorTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( @@ -2820,7 +2820,7 @@ index 1ca8df34..f6a24af8 100644 + .setVersion(1.0) + .setErrorCaptureLimit( + AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorSpansPerSecond(2) ++ .setErrorTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( @@ -2902,7 +2902,7 @@ index 1ca8df34..f6a24af8 100644 + .setVersion(1.0) + .setErrorCaptureLimit( + AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorSpansPerSecond(10) ++ .setErrorTracesPerSecond(10) + .build()) + .setAnomalyConditions( + Arrays.asList( diff --git a/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java b/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java index 308e11ab26..2a3c2ee831 100644 --- a/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java +++ b/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java @@ -81,7 +81,7 @@ void setAdaptiveSamplingConfigFromFile_validYaml() // Assert the configuration was parsed correctly assertThat(config).isNotNull(); assertThat(config.getVersion()).isEqualTo(1); - assertThat(config.getErrorCaptureLimit().getErrorSpansPerSecond()).isEqualTo(10); + assertThat(config.getErrorCaptureLimit().getErrorTracesPerSecond()).isEqualTo(10); } @Test diff --git a/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml b/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml index 6091921955..6d5b5a119f 100644 --- a/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml +++ b/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml @@ -10,4 +10,4 @@ anomalyConditions: operations: invalid part of config usage: both errorCaptureLimit: - errorSpansPerSecond: 10 \ No newline at end of file + errorTracesPerSecond: 10 \ No newline at end of file diff --git a/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml b/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml index 34545a0c6a..2fa380ccf0 100644 --- a/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml +++ b/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml @@ -9,4 +9,4 @@ anomalyConditions: - errorCodeRegex: "^2\\d\\d$" usage: both errorCaptureLimit: - errorSpansPerSecond: 10 \ No newline at end of file + errorTracesPerSecond: 10 \ No newline at end of file From 66d5762304e2ac56ae57e5574a4e52e085d513ac Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Tue, 26 Aug 2025 14:53:46 -0700 Subject: [PATCH 3/7] Make both caches the same size for consistency --- .github/patches/opentelemetry-java-contrib.patch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/patches/opentelemetry-java-contrib.patch b/.github/patches/opentelemetry-java-contrib.patch index 9c8229c2d4..45cc0498e8 100644 --- a/.github/patches/opentelemetry-java-contrib.patch +++ b/.github/patches/opentelemetry-java-contrib.patch @@ -902,7 +902,7 @@ index 1ef8abf5..328e63dd 100644 } } diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -index 75977dc0..23d3d490 100644 +index 75977dc0..b933a044 100644 --- a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java @@ -5,42 +5,81 @@ @@ -1037,7 +1037,7 @@ index 75977dc0..23d3d490 100644 + .build(); + this.capturedTraceIdCache = + Caffeine.newBuilder() -+ .maximumSize(10_000) ++ .maximumSize(100_000) + .ticker(clock::nanoTime) + .expireAfterWrite(Duration.ofMinutes(1)) + .build(); From 0084f706a78a830f79aae2098267aebe8f427a73 Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Tue, 26 Aug 2025 16:58:58 -0700 Subject: [PATCH 4/7] Make default error capture rate 1 trace/second --- .../patches/opentelemetry-java-contrib.patch | 51 ++++++++++++------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/.github/patches/opentelemetry-java-contrib.patch b/.github/patches/opentelemetry-java-contrib.patch index 45cc0498e8..8e2cd17da8 100644 --- a/.github/patches/opentelemetry-java-contrib.patch +++ b/.github/patches/opentelemetry-java-contrib.patch @@ -902,7 +902,7 @@ index 1ef8abf5..328e63dd 100644 } } diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -index 75977dc0..b933a044 100644 +index 75977dc0..1edf4c10 100644 --- a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java @@ -5,42 +5,81 @@ @@ -1008,7 +1008,7 @@ index 75977dc0..b933a044 100644 } private XrayRulesSampler( -@@ -58,12 +106,42 @@ final class XrayRulesSampler implements Sampler { +@@ -58,12 +106,46 @@ final class XrayRulesSampler implements Sampler { Resource resource, Clock clock, Sampler fallbackSampler, @@ -1042,8 +1042,12 @@ index 75977dc0..b933a044 100644 + .expireAfterWrite(Duration.ofMinutes(1)) + .build(); + -+ // Initialize anomaly capture rate limiter if error capture limit is configured -+ if (adaptiveSamplingConfig != null && adaptiveSamplingConfig.getErrorCaptureLimit() != null) { ++ // Initialize anomaly capture rate limiter ++ if (this.adaptiveSamplingConfig != null ++ && this.adaptiveSamplingConfig.getErrorCaptureLimit() == null) { ++ this.anomalyCaptureRateLimiter = new RateLimiter(1, 1, clock); ++ } else if (adaptiveSamplingConfig != null ++ && adaptiveSamplingConfig.getErrorCaptureLimit() != null) { + int errorTracesPerSecond = + adaptiveSamplingConfig.getErrorCaptureLimit().getErrorTracesPerSecond(); + this.anomalyCaptureRateLimiter = @@ -1052,7 +1056,7 @@ index 75977dc0..b933a044 100644 } @Override -@@ -74,10 +152,36 @@ final class XrayRulesSampler implements Sampler { +@@ -74,10 +156,36 @@ final class XrayRulesSampler implements Sampler { SpanKind spanKind, Attributes attributes, List parentLinks) { @@ -1091,7 +1095,7 @@ index 75977dc0..b933a044 100644 } } -@@ -96,7 +200,164 @@ final class XrayRulesSampler implements Sampler { +@@ -96,7 +204,164 @@ final class XrayRulesSampler implements Sampler { return "XrayRulesSampler{" + Arrays.toString(ruleAppliers) + "}"; } @@ -1257,7 +1261,7 @@ index 75977dc0..b933a044 100644 return Arrays.stream(ruleAppliers) .map(rule -> rule.snapshot(now)) .filter(Objects::nonNull) -@@ -115,15 +376,16 @@ final class XrayRulesSampler implements Sampler { +@@ -115,15 +380,16 @@ final class XrayRulesSampler implements Sampler { Map ruleTargets, Set requestedTargetRuleNames, Date now) { @@ -1276,7 +1280,7 @@ index 75977dc0..b933a044 100644 } if (requestedTargetRuleNames.contains(rule.getRuleName())) { // In practice X-Ray should return a target for any rule we requested but -@@ -135,6 +397,90 @@ final class XrayRulesSampler implements Sampler { +@@ -135,6 +401,90 @@ final class XrayRulesSampler implements Sampler { return rule; }) .toArray(SamplingRuleApplier[]::new); @@ -2005,7 +2009,7 @@ index 6bb6e82a..6d71711b 100644 return applier.shouldSample( Context.current(), diff --git a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java -index 1ca8df34..2f25c8af 100644 +index 1ca8df34..5830a083 100644 --- a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java +++ b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java @@ -5,17 +5,28 @@ @@ -2225,7 +2229,7 @@ index 1ca8df34..2f25c8af 100644 // Minimum is batTarget, 5s from now assertThat(sampler.nextTargetFetchTimeNanos()) -@@ -169,6 +251,763 @@ class XrayRulesSamplerTest { +@@ -169,6 +251,774 @@ class XrayRulesSamplerTest { assertThat(sampler.snapshot(Date.from(now))).hasSize(4); } @@ -2897,13 +2901,10 @@ index 1ca8df34..2f25c8af 100644 + SamplingRateBoost.create(1, 300)); + + TestClock clock = TestClock.create(); ++ // Error span capture should default to 1/s + AwsXrayAdaptiveSamplingConfig config = + AwsXrayAdaptiveSamplingConfig.builder() + .setVersion(1.0) -+ .setErrorCaptureLimit( -+ AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorTracesPerSecond(10) -+ .build()) + .setAnomalyConditions( + Arrays.asList( + AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() @@ -2930,7 +2931,6 @@ index 1ca8df34..2f25c8af 100644 + + SpanData spanDataMock = mock(SpanData.class); + Attributes attributesMock = mock(Attributes.class); -+ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID"); + when(spanDataMock.getAttributes()).thenReturn(attributesMock); + when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(500L); + @@ -2938,34 +2938,49 @@ index 1ca8df34..2f25c8af 100644 + Consumer stubbedConsumer = x -> exportCounter.increment(); + + // Test matching operations ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID1"); + when(attributesMock.get(URL_PATH)).thenReturn("/api1/ext"); + when(attributesMock.get(HTTP_METHOD)).thenReturn("GET"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ ++ clock.advance(Duration.ofSeconds(5)); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID2"); + when(attributesMock.get(URL_PATH)).thenReturn("/api2"); + when(attributesMock.get(HTTP_METHOD)).thenReturn("GET"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sumThenReset()).isEqualTo(2L); + ++ // Not enough time elapsed, error rate limit was hit ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID3"); ++ when(attributesMock.get(URL_PATH)).thenReturn("/api2"); ++ when(attributesMock.get(HTTP_METHOD)).thenReturn("GET"); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(0L); ++ + // Test non-matching operation -+ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID_2"); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID4"); + when(attributesMock.get(URL_PATH)).thenReturn("/api1/ext"); + when(attributesMock.get(HTTP_METHOD)).thenReturn("POST"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID5"); + when(attributesMock.get(URL_PATH)).thenReturn("/non-matching"); + when(attributesMock.get(HTTP_METHOD)).thenReturn("GET"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sumThenReset()).isEqualTo(0L); + + // Test aws.local.operation takes priority ++ clock.advance(Duration.ofSeconds(5)); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID6"); + when(attributesMock.get(AwsAttributeKeys.AWS_LOCAL_OPERATION)).thenReturn("GET /api1"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sumThenReset()).isEqualTo(1L); + + // Test sending previously matched traceIDs gets captured -+ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID"); ++ clock.advance(Duration.ofSeconds(5)); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID1"); + when(attributesMock.get(AwsAttributeKeys.AWS_LOCAL_OPERATION)).thenReturn("GET /non-matching"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); -+ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID_2"); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID2"); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + assertThat(exportCounter.sumThenReset()).isEqualTo(2L); + From a585fb4fa20fab4c8220e10cd3b1b035c72f389b Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Tue, 26 Aug 2025 17:16:06 -0700 Subject: [PATCH 5/7] More naming updates --- .../patches/opentelemetry-java-contrib.patch | 90 +++++++++---------- ...licationSignalsCustomizerProviderTest.java | 2 +- .../adaptive-sampling-config-valid.yaml | 4 +- 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/.github/patches/opentelemetry-java-contrib.patch b/.github/patches/opentelemetry-java-contrib.patch index 8e2cd17da8..cf7a5e6011 100644 --- a/.github/patches/opentelemetry-java-contrib.patch +++ b/.github/patches/opentelemetry-java-contrib.patch @@ -80,7 +80,7 @@ index 00000000..41f22f90 +} diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java new file mode 100644 -index 00000000..b2146f13 +index 00000000..8c2e8fe2 --- /dev/null +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java @@ -0,0 +1,139 @@ @@ -112,9 +112,9 @@ index 00000000..b2146f13 + @Nullable + public abstract List getAnomalyConditions(); + -+ @JsonProperty("errorCaptureLimit") ++ @JsonProperty("anomalyCaptureLimit") + @Nullable -+ public abstract ErrorCaptureLimit getErrorCaptureLimit(); ++ public abstract AnomalyCaptureLimit getAnomalyCaptureLimit(); + + public static Builder builder() { + return new AutoValue_AwsXrayAdaptiveSamplingConfig.Builder(); @@ -128,8 +128,8 @@ index 00000000..b2146f13 + @JsonProperty("anomalyConditions") + public abstract Builder setAnomalyConditions(List value); + -+ @JsonProperty("errorCaptureLimit") -+ public abstract Builder setErrorCaptureLimit(ErrorCaptureLimit value); ++ @JsonProperty("anomalyCaptureLimit") ++ public abstract Builder setAnomalyCaptureLimit(AnomalyCaptureLimit value); + + public abstract AwsXrayAdaptiveSamplingConfig build(); + } @@ -179,7 +179,7 @@ index 00000000..b2146f13 + public enum UsageType { + BOTH("both"), + SAMPLING_BOOST("sampling-boost"), -+ ERROR_SPAN_CAPTURE("error-span-capture"); ++ ANOMALY_SPAN_CAPTURE("anomaly-span-capture"); + + private final String value; + @@ -205,21 +205,21 @@ index 00000000..b2146f13 + + @AutoValue + @JsonDeserialize( -+ builder = AutoValue_AwsXrayAdaptiveSamplingConfig_ErrorCaptureLimit.Builder.class) -+ public abstract static class ErrorCaptureLimit { -+ @JsonProperty("errorTracesPerSecond") -+ public abstract int getErrorTracesPerSecond(); ++ builder = AutoValue_AwsXrayAdaptiveSamplingConfig_AnomalyCaptureLimit.Builder.class) ++ public abstract static class AnomalyCaptureLimit { ++ @JsonProperty("anomalyTracesPerSecond") ++ public abstract int getAnomalyTracesPerSecond(); + + public static Builder builder() { -+ return new AutoValue_AwsXrayAdaptiveSamplingConfig_ErrorCaptureLimit.Builder(); ++ return new AutoValue_AwsXrayAdaptiveSamplingConfig_AnomalyCaptureLimit.Builder(); + } + + @AutoValue.Builder + public abstract static class Builder { -+ @JsonProperty("errorTracesPerSecond") -+ public abstract Builder setErrorTracesPerSecond(int value); ++ @JsonProperty("anomalyTracesPerSecond") ++ public abstract Builder setAnomalyTracesPerSecond(int value); + -+ public abstract ErrorCaptureLimit build(); ++ public abstract AnomalyCaptureLimit build(); + } + } +} @@ -902,7 +902,7 @@ index 1ef8abf5..328e63dd 100644 } } diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -index 75977dc0..1edf4c10 100644 +index 75977dc0..b86426dc 100644 --- a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java @@ -5,42 +5,81 @@ @@ -1044,14 +1044,14 @@ index 75977dc0..1edf4c10 100644 + + // Initialize anomaly capture rate limiter + if (this.adaptiveSamplingConfig != null -+ && this.adaptiveSamplingConfig.getErrorCaptureLimit() == null) { ++ && this.adaptiveSamplingConfig.getAnomalyCaptureLimit() == null) { + this.anomalyCaptureRateLimiter = new RateLimiter(1, 1, clock); + } else if (adaptiveSamplingConfig != null -+ && adaptiveSamplingConfig.getErrorCaptureLimit() != null) { -+ int errorTracesPerSecond = -+ adaptiveSamplingConfig.getErrorCaptureLimit().getErrorTracesPerSecond(); ++ && adaptiveSamplingConfig.getAnomalyCaptureLimit() != null) { ++ int anomalyTracesPerSecond = ++ adaptiveSamplingConfig.getAnomalyCaptureLimit().getAnomalyTracesPerSecond(); + this.anomalyCaptureRateLimiter = -+ new RateLimiter(errorTracesPerSecond, errorTracesPerSecond, clock); ++ new RateLimiter(anomalyTracesPerSecond, anomalyTracesPerSecond, clock); + } } @@ -1082,7 +1082,7 @@ index 75977dc0..1edf4c10 100644 + } + String hashedRule = ruleToHashMap.getOrDefault(ruleToPropagate, ruleToPropagate); + if (this.adaptiveSamplingConfig != null -+ && this.adaptiveSamplingConfig.getErrorCaptureLimit() != null) { ++ && this.adaptiveSamplingConfig.getAnomalyCaptureLimit() != null) { + // If the span is capturable based on local SDK config, add sampling rule attribute + return AwsSamplingResult.create( + result.getDecision(), @@ -1107,10 +1107,10 @@ index 75977dc0..1edf4c10 100644 + this.adaptiveSamplingConfig = config; + + // Initialize anomaly capture rate limiter if error capture limit is configured -+ if (config.getErrorCaptureLimit() != null) { -+ int errorTracesPerSecond = config.getErrorCaptureLimit().getErrorTracesPerSecond(); ++ if (config.getAnomalyCaptureLimit() != null) { ++ int anomalyTracesPerSecond = config.getAnomalyCaptureLimit().getAnomalyTracesPerSecond(); + this.anomalyCaptureRateLimiter = -+ new RateLimiter(errorTracesPerSecond, errorTracesPerSecond, clock); ++ new RateLimiter(anomalyTracesPerSecond, anomalyTracesPerSecond, clock); + } + } + } @@ -1138,7 +1138,7 @@ index 75977dc0..1edf4c10 100644 + && AwsXrayAdaptiveSamplingConfig.UsageType.SAMPLING_BOOST.equals( + condition.getUsage())) + || (shouldCaptureAnomalySpan -+ && AwsXrayAdaptiveSamplingConfig.UsageType.ERROR_SPAN_CAPTURE.equals( ++ && AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE.equals( + condition.getUsage()))) { + continue; + } @@ -1173,7 +1173,7 @@ index 75977dc0..1edf4c10 100644 + case SAMPLING_BOOST: + shouldBoostSampling = true; + break; -+ case ERROR_SPAN_CAPTURE: ++ case ANOMALY_SPAN_CAPTURE: + shouldCaptureAnomalySpan = true; + break; + } @@ -2009,7 +2009,7 @@ index 6bb6e82a..6d71711b 100644 return applier.shouldSample( Context.current(), diff --git a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java -index 1ca8df34..5830a083 100644 +index 1ca8df34..0464a304 100644 --- a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java +++ b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java @@ -5,17 +5,28 @@ @@ -2302,9 +2302,9 @@ index 1ca8df34..5830a083 100644 + AwsXrayAdaptiveSamplingConfig config = + AwsXrayAdaptiveSamplingConfig.builder() + .setVersion(1.0) -+ .setErrorCaptureLimit( -+ AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorTracesPerSecond(2) ++ .setAnomalyCaptureLimit( ++ AwsXrayAdaptiveSamplingConfig.AnomalyCaptureLimit.builder() ++ .setAnomalyTracesPerSecond(2) + .build()) + .build(); + @@ -2542,9 +2542,9 @@ index 1ca8df34..5830a083 100644 + AwsXrayAdaptiveSamplingConfig config = + AwsXrayAdaptiveSamplingConfig.builder() + .setVersion(1.0) -+ .setErrorCaptureLimit( -+ AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorTracesPerSecond(2) ++ .setAnomalyCaptureLimit( ++ AwsXrayAdaptiveSamplingConfig.AnomalyCaptureLimit.builder() ++ .setAnomalyTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( @@ -2686,9 +2686,9 @@ index 1ca8df34..5830a083 100644 + AwsXrayAdaptiveSamplingConfig config = + AwsXrayAdaptiveSamplingConfig.builder() + .setVersion(1.0) -+ .setErrorCaptureLimit( -+ AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorTracesPerSecond(2) ++ .setAnomalyCaptureLimit( ++ AwsXrayAdaptiveSamplingConfig.AnomalyCaptureLimit.builder() ++ .setAnomalyTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( @@ -2754,15 +2754,15 @@ index 1ca8df34..5830a083 100644 + AwsXrayAdaptiveSamplingConfig config = + AwsXrayAdaptiveSamplingConfig.builder() + .setVersion(1.0) -+ .setErrorCaptureLimit( -+ AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorTracesPerSecond(2) ++ .setAnomalyCaptureLimit( ++ AwsXrayAdaptiveSamplingConfig.AnomalyCaptureLimit.builder() ++ .setAnomalyTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( + AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() + .setHighLatencyMs(100L) -+ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ERROR_SPAN_CAPTURE) ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE) + .build())) + .build(); + XrayRulesSampler sampler = @@ -2822,16 +2822,16 @@ index 1ca8df34..5830a083 100644 + AwsXrayAdaptiveSamplingConfig config = + AwsXrayAdaptiveSamplingConfig.builder() + .setVersion(1.0) -+ .setErrorCaptureLimit( -+ AwsXrayAdaptiveSamplingConfig.ErrorCaptureLimit.builder() -+ .setErrorTracesPerSecond(2) ++ .setAnomalyCaptureLimit( ++ AwsXrayAdaptiveSamplingConfig.AnomalyCaptureLimit.builder() ++ .setAnomalyTracesPerSecond(2) + .build()) + .setAnomalyConditions( + Arrays.asList( + AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() + .setErrorCodeRegex("^456$") + .setHighLatencyMs(100L) -+ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ERROR_SPAN_CAPTURE) ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE) + .build())) + .build(); + XrayRulesSampler sampler = @@ -2910,7 +2910,7 @@ index 1ca8df34..5830a083 100644 + AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() + .setOperations(Arrays.asList("GET /api1", "GET /api2")) + .setErrorCodeRegex("^500$") -+ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ERROR_SPAN_CAPTURE) ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE) + .build())) + .build(); + XrayRulesSampler sampler = diff --git a/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java b/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java index 2a3c2ee831..93d6a97f11 100644 --- a/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java +++ b/awsagentprovider/src/test/java/software/amazon/opentelemetry/javaagent/providers/AwsApplicationSignalsCustomizerProviderTest.java @@ -81,7 +81,7 @@ void setAdaptiveSamplingConfigFromFile_validYaml() // Assert the configuration was parsed correctly assertThat(config).isNotNull(); assertThat(config.getVersion()).isEqualTo(1); - assertThat(config.getErrorCaptureLimit().getErrorTracesPerSecond()).isEqualTo(10); + assertThat(config.getAnomalyCaptureLimit().getAnomalyTracesPerSecond()).isEqualTo(10); } @Test diff --git a/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml b/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml index 2fa380ccf0..dcfd187628 100644 --- a/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml +++ b/awsagentprovider/src/test/resources/adaptive-sampling-config-valid.yaml @@ -8,5 +8,5 @@ anomalyConditions: usage: both - errorCodeRegex: "^2\\d\\d$" usage: both -errorCaptureLimit: - errorTracesPerSecond: 10 \ No newline at end of file +anomalyCaptureLimit: + anomalyTracesPerSecond: 10 \ No newline at end of file From 4337fe4f825717d88238c16c6cc01b6b86ff2f04 Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Wed, 27 Aug 2025 14:29:24 -0700 Subject: [PATCH 6/7] Merge caches and reuse enum as value --- .../patches/opentelemetry-java-contrib.patch | 218 ++++++++++++++---- .../adaptive-sampling-config-invalid.yaml | 4 +- 2 files changed, 169 insertions(+), 53 deletions(-) diff --git a/.github/patches/opentelemetry-java-contrib.patch b/.github/patches/opentelemetry-java-contrib.patch index cf7a5e6011..27559888eb 100644 --- a/.github/patches/opentelemetry-java-contrib.patch +++ b/.github/patches/opentelemetry-java-contrib.patch @@ -80,10 +80,10 @@ index 00000000..41f22f90 +} diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java new file mode 100644 -index 00000000..8c2e8fe2 +index 00000000..dc5b7a01 --- /dev/null +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsXrayAdaptiveSamplingConfig.java -@@ -0,0 +1,139 @@ +@@ -0,0 +1,148 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 @@ -179,7 +179,8 @@ index 00000000..8c2e8fe2 + public enum UsageType { + BOTH("both"), + SAMPLING_BOOST("sampling-boost"), -+ ANOMALY_SPAN_CAPTURE("anomaly-span-capture"); ++ ANOMALY_TRACE_CAPTURE("anomaly-trace-capture"), ++ NEITHER("neither"); // Not meant to be used by customers + + private final String value; + @@ -201,6 +202,14 @@ index 00000000..8c2e8fe2 + } + throw new IllegalArgumentException("Invalid usage value: " + value); + } ++ ++ public static boolean isUsedForBoost(UsageType usage) { ++ return BOTH.equals(usage) || SAMPLING_BOOST.equals(usage); ++ } ++ ++ public static boolean isUsedForAnomalyTraceCapture(UsageType usage) { ++ return BOTH.equals(usage) || ANOMALY_TRACE_CAPTURE.equals(usage); ++ } + } + + @AutoValue @@ -902,10 +911,10 @@ index 1ef8abf5..328e63dd 100644 } } diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -index 75977dc0..b86426dc 100644 +index 75977dc0..cb4a09b9 100644 --- a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -@@ -5,42 +5,81 @@ +@@ -5,42 +5,79 @@ package io.opentelemetry.contrib.awsxray; @@ -916,7 +925,6 @@ index 75977dc0..b86426dc 100644 +import io.opentelemetry.api.common.AttributeKey; import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.trace.Span; -+import io.opentelemetry.api.trace.SpanContext; import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.StatusCode; import io.opentelemetry.context.Context; @@ -971,8 +979,7 @@ index 75977dc0..b86426dc 100644 + private final Map hashToRuleMap; + + private final boolean adaptiveSamplingRuleExists; -+ private final Cache anomalyTracesCache; -+ private final Cache capturedTraceIdCache; ++ private final Cache traceUsageCache; + + @Nullable private AwsXrayAdaptiveSamplingConfig adaptiveSamplingConfig; + @Nullable private RateLimiter anomalyCaptureRateLimiter; @@ -988,7 +995,7 @@ index 75977dc0..b86426dc 100644 this( clientId, resource, -@@ -49,8 +88,17 @@ final class XrayRulesSampler implements Sampler { +@@ -49,8 +86,17 @@ final class XrayRulesSampler implements Sampler { rules.stream() // Lower priority value takes precedence so normal ascending sort. .sorted(Comparator.comparingInt(GetSamplingRulesResponse.SamplingRule::getPriority)) @@ -1008,7 +1015,7 @@ index 75977dc0..b86426dc 100644 } private XrayRulesSampler( -@@ -58,12 +106,46 @@ final class XrayRulesSampler implements Sampler { +@@ -58,12 +104,40 @@ final class XrayRulesSampler implements Sampler { Resource resource, Clock clock, Sampler fallbackSampler, @@ -1029,18 +1036,12 @@ index 75977dc0..b86426dc 100644 + } + this.adaptiveSamplingRuleExists = adaptiveSamplingRuleExists; + this.adaptiveSamplingConfig = adaptiveSamplingConfig; -+ this.anomalyTracesCache = ++ this.traceUsageCache = + Caffeine.newBuilder() + .maximumSize(100_000) + .ticker(clock::nanoTime) + .expireAfterWrite(Duration.ofMinutes(10)) + .build(); -+ this.capturedTraceIdCache = -+ Caffeine.newBuilder() -+ .maximumSize(100_000) -+ .ticker(clock::nanoTime) -+ .expireAfterWrite(Duration.ofMinutes(1)) -+ .build(); + + // Initialize anomaly capture rate limiter + if (this.adaptiveSamplingConfig != null @@ -1056,7 +1057,7 @@ index 75977dc0..b86426dc 100644 } @Override -@@ -74,10 +156,36 @@ final class XrayRulesSampler implements Sampler { +@@ -74,10 +148,36 @@ final class XrayRulesSampler implements Sampler { SpanKind spanKind, Attributes attributes, List parentLinks) { @@ -1095,7 +1096,7 @@ index 75977dc0..b86426dc 100644 } } -@@ -96,7 +204,164 @@ final class XrayRulesSampler implements Sampler { +@@ -96,7 +196,185 @@ final class XrayRulesSampler implements Sampler { return "XrayRulesSampler{" + Arrays.toString(ruleAppliers) + "}"; } @@ -1116,7 +1117,7 @@ index 75977dc0..b86426dc 100644 + } + + void adaptSampling(ReadableSpan span, SpanData spanData, Consumer spanBatcher) { -+ if (!adaptiveSamplingRuleExists) { ++ if (!adaptiveSamplingRuleExists && this.adaptiveSamplingConfig == null) { + return; + } + Long statusCode = spanData.getAttributes().get(HTTP_RESPONSE_STATUS_CODE); @@ -1138,7 +1139,7 @@ index 75977dc0..b86426dc 100644 + && AwsXrayAdaptiveSamplingConfig.UsageType.SAMPLING_BOOST.equals( + condition.getUsage())) + || (shouldCaptureAnomalySpan -+ && AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE.equals( ++ && AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_TRACE_CAPTURE.equals( + condition.getUsage()))) { + continue; + } @@ -1173,9 +1174,10 @@ index 75977dc0..b86426dc 100644 + case SAMPLING_BOOST: + shouldBoostSampling = true; + break; -+ case ANOMALY_SPAN_CAPTURE: ++ case ANOMALY_TRACE_CAPTURE: + shouldCaptureAnomalySpan = true; + break; ++ default: // do nothing + } + } else { + shouldBoostSampling = true; @@ -1195,22 +1197,23 @@ index 75977dc0..b86426dc 100644 + } + + String traceId = spanData.getTraceId(); -+ SpanContext parentContext = spanData.getParentSpanContext(); -+ boolean isLocalRootSpan = -+ parentContext == null || !parentContext.isValid() || parentContext.isRemote(); ++ AwsXrayAdaptiveSamplingConfig.UsageType existingUsage = traceUsageCache.getIfPresent(traceId); ++ boolean isNewTrace = existingUsage == null; + + // Anomaly Capture -+ if (capturedTraceIdCache.getIfPresent(traceId) != null ++ boolean isSpanCaptured = false; ++ if (AwsXrayAdaptiveSamplingConfig.UsageType.isUsedForAnomalyTraceCapture(existingUsage) + || (shouldCaptureAnomalySpan + && !span.getSpanContext().isSampled() + && anomalyCaptureRateLimiter != null + && anomalyCaptureRateLimiter.trySpend(1))) { -+ capturedTraceIdCache.put(traceId, true); + spanBatcher.accept(span); ++ isSpanCaptured = true; + } + + // Sampling Boost -+ if (shouldBoostSampling || isLocalRootSpan) { ++ boolean isCountedAsAnomalyForBoost = false; ++ if (shouldBoostSampling || isNewTrace) { + String traceStateValue = + span.getSpanContext() + .getTraceState() @@ -1244,16 +1247,35 @@ index 75977dc0..b86426dc 100644 + if (shouldBoostSampling + && ruleToReportTo != null + && ruleToReportTo.hasBoost() -+ && this.anomalyTracesCache.getIfPresent(traceId) == null) { -+ this.anomalyTracesCache.put(traceId, true); ++ && !AwsXrayAdaptiveSamplingConfig.UsageType.isUsedForBoost(existingUsage)) { + ruleToReportTo.countAnomalyTrace(span); ++ isCountedAsAnomalyForBoost = true; + } -+ if (isLocalRootSpan) { -+ if (ruleToReportTo != null && ruleToReportTo.hasBoost()) { -+ ruleToReportTo.countTrace(); -+ } -+ this.anomalyTracesCache.invalidate(traceId); ++ if (isNewTrace && ruleToReportTo != null && ruleToReportTo.hasBoost()) { ++ ruleToReportTo.countTrace(); ++ } ++ } ++ ++ // Any interaction with a cache entry will reset the expiration timer of that entry ++ if (isSpanCaptured && isCountedAsAnomalyForBoost) { ++ this.traceUsageCache.put(traceId, AwsXrayAdaptiveSamplingConfig.UsageType.BOTH); ++ } else if (isSpanCaptured) { ++ if (AwsXrayAdaptiveSamplingConfig.UsageType.isUsedForBoost(existingUsage)) { ++ this.traceUsageCache.put(traceId, AwsXrayAdaptiveSamplingConfig.UsageType.BOTH); ++ } else { ++ this.traceUsageCache.put( ++ traceId, AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_TRACE_CAPTURE); ++ } ++ } else if (isCountedAsAnomalyForBoost) { ++ if (AwsXrayAdaptiveSamplingConfig.UsageType.isUsedForAnomalyTraceCapture(existingUsage)) { ++ this.traceUsageCache.put(traceId, AwsXrayAdaptiveSamplingConfig.UsageType.BOTH); ++ } else { ++ this.traceUsageCache.put(traceId, AwsXrayAdaptiveSamplingConfig.UsageType.SAMPLING_BOOST); + } ++ } else if (existingUsage != null) { ++ this.traceUsageCache.put(traceId, existingUsage); ++ } else { ++ this.traceUsageCache.put(traceId, AwsXrayAdaptiveSamplingConfig.UsageType.NEITHER); + } + } + @@ -1261,7 +1283,7 @@ index 75977dc0..b86426dc 100644 return Arrays.stream(ruleAppliers) .map(rule -> rule.snapshot(now)) .filter(Objects::nonNull) -@@ -115,15 +380,16 @@ final class XrayRulesSampler implements Sampler { +@@ -115,15 +393,16 @@ final class XrayRulesSampler implements Sampler { Map ruleTargets, Set requestedTargetRuleNames, Date now) { @@ -1280,7 +1302,7 @@ index 75977dc0..b86426dc 100644 } if (requestedTargetRuleNames.contains(rule.getRuleName())) { // In practice X-Ray should return a target for any rule we requested but -@@ -135,6 +401,90 @@ final class XrayRulesSampler implements Sampler { +@@ -135,6 +414,91 @@ final class XrayRulesSampler implements Sampler { return rule; }) .toArray(SamplingRuleApplier[]::new); @@ -1368,8 +1390,9 @@ index 75977dc0..b86426dc 100644 + } + + // For testing -+ Cache getAnomalyTracesCache() { -+ return anomalyTracesCache; ++ Cache getTraceUsageCache() { ++ traceUsageCache.cleanUp(); ++ return traceUsageCache; } } diff --git a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/AwsXrayRemoteSamplerTest.java b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/AwsXrayRemoteSamplerTest.java @@ -2009,7 +2032,7 @@ index 6bb6e82a..6d71711b 100644 return applier.shouldSample( Context.current(), diff --git a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java -index 1ca8df34..0464a304 100644 +index 1ca8df34..72ec524b 100644 --- a/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java +++ b/aws-xray/src/test/java/io/opentelemetry/contrib/awsxray/XrayRulesSamplerTest.java @@ -5,17 +5,28 @@ @@ -2229,7 +2252,7 @@ index 1ca8df34..0464a304 100644 // Minimum is batTarget, 5s from now assertThat(sampler.nextTargetFetchTimeNanos()) -@@ -169,6 +251,774 @@ class XrayRulesSamplerTest { +@@ -169,6 +251,867 @@ class XrayRulesSamplerTest { assertThat(sampler.snapshot(Date.from(now))).hasSize(4); } @@ -2502,7 +2525,7 @@ index 1ca8df34..0464a304 100644 + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); -+ assertThat(sampler.getAnomalyTracesCache().asMap().size()).isEqualTo(0); ++ assertThat(sampler.getTraceUsageCache().asMap().size()).isEqualTo(0); + } + + @Test @@ -2602,18 +2625,18 @@ index 1ca8df34..0464a304 100644 + assertThat(snapshot.get(1).getBoostStatisticsDocument().getSampledAnomalyCount()).isEqualTo(0); + + // Mock trace coming from upstream service where it was sampled by cat-rule ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID4"); + when(readableSpanMock.getSpanContext()) + .thenReturn( + SpanContext.create( -+ "TRACE_ID", ++ "TRACE_ID4", + "SPAN_ID", + TraceFlags.getDefault(), + TraceState.builder() -+ .put(AwsSamplingResult.AWS_XRAY_SAMPLING_RULE_TRACE_STATE_KEY, "cat-rule") ++ .put( ++ AwsSamplingResult.AWS_XRAY_SAMPLING_RULE_TRACE_STATE_KEY, ++ XrayRulesSampler.hashRuleName("cat-rule")) + .build())); -+ -+ // When we adapt sampling, we should see this rule and report to it even though it doesn't -+ // match + sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); + + // Ensure snapshot shows correctly saved statistics @@ -2625,7 +2648,11 @@ index 1ca8df34..0464a304 100644 + assertThat(snapshot.get(1).getBoostStatisticsDocument().getTotalCount()).isEqualTo(0); + assertThat(snapshot.get(1).getBoostStatisticsDocument().getAnomalyCount()).isEqualTo(0); + assertThat(snapshot.get(1).getBoostStatisticsDocument().getSampledAnomalyCount()).isEqualTo(0); -+ assertThat(sampler.getAnomalyTracesCache().asMap().size()).isEqualTo(0); ++ ++ // Assert the trace ID cache is filled with appropriate data and is cleared after TTL passes ++ assertThat(sampler.getTraceUsageCache().asMap().size()).isEqualTo(4); ++ clock.advance(Duration.ofMinutes(100)); ++ assertThat(sampler.getTraceUsageCache().asMap().size()).isEqualTo(0); + } + + @Test @@ -2762,7 +2789,7 @@ index 1ca8df34..0464a304 100644 + Arrays.asList( + AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() + .setHighLatencyMs(100L) -+ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE) ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_TRACE_CAPTURE) + .build())) + .build(); + XrayRulesSampler sampler = @@ -2831,7 +2858,7 @@ index 1ca8df34..0464a304 100644 + AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() + .setErrorCodeRegex("^456$") + .setHighLatencyMs(100L) -+ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE) ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_TRACE_CAPTURE) + .build())) + .build(); + XrayRulesSampler sampler = @@ -2882,6 +2909,95 @@ index 1ca8df34..0464a304 100644 + } + + @Test ++ void recordAndCaptureErrorBasedOnSeparateConditions() { ++ SamplingRule rule1 = ++ SamplingRule.create( ++ Collections.emptyMap(), ++ 0.0, ++ "*", ++ "*", ++ 1, ++ 0, ++ "*", ++ "*", ++ "test-rule", ++ "*", ++ "*", ++ "*", ++ 1, ++ SamplingRateBoost.create(1, 300)); ++ ++ TestClock clock = TestClock.create(); ++ AwsXrayAdaptiveSamplingConfig config = ++ AwsXrayAdaptiveSamplingConfig.builder() ++ .setVersion(1.0) ++ .setAnomalyCaptureLimit( ++ AwsXrayAdaptiveSamplingConfig.AnomalyCaptureLimit.builder() ++ .setAnomalyTracesPerSecond(10) ++ .build()) ++ .setAnomalyConditions( ++ Arrays.asList( ++ AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() ++ .setErrorCodeRegex("^5\\d\\d$") ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.SAMPLING_BOOST) ++ .build(), ++ AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() ++ .setErrorCodeRegex("^4\\d\\d$") ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_TRACE_CAPTURE) ++ .build())) ++ .build(); ++ XrayRulesSampler sampler = ++ new XrayRulesSampler( ++ "CLIENT_ID", ++ Resource.getDefault(), ++ clock, ++ Sampler.alwaysOn(), ++ Arrays.asList(rule1), ++ config); ++ ++ ReadableSpan readableSpanMock = mock(ReadableSpan.class); ++ when(readableSpanMock.getSpanContext()) ++ .thenReturn( ++ SpanContext.create( ++ "TRACE_ID", "SPAN_ID", TraceFlags.getDefault(), TraceState.getDefault())); ++ ++ SpanData spanDataMock = mock(SpanData.class); ++ Attributes attributesMock = mock(Attributes.class); ++ when(spanDataMock.getAttributes()).thenReturn(attributesMock); ++ when(spanDataMock.getTraceId()).thenReturn("TRACE_ID"); ++ LongAdder exportCounter = new LongAdder(); ++ Consumer stubbedConsumer = x -> exportCounter.add(1); ++ ++ // Boost condition triggered - count new trace + count anomaly ++ when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(511L); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(sampler.getTraceUsageCache().getIfPresent("TRACE_ID")) ++ .isEqualTo(AwsXrayAdaptiveSamplingConfig.UsageType.SAMPLING_BOOST); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(0L); ++ ++ // Anomaly capture triggered - capture and update cache value ++ when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(411L); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(sampler.getTraceUsageCache().getIfPresent("TRACE_ID")) ++ .isEqualTo(AwsXrayAdaptiveSamplingConfig.UsageType.BOTH); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(1L); ++ ++ // Boost condition triggered - capture span even though anomaly capture not included ++ when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(511L); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(sampler.getTraceUsageCache().getIfPresent("TRACE_ID")) ++ .isEqualTo(AwsXrayAdaptiveSamplingConfig.UsageType.BOTH); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(1L); ++ ++ // Non-anomaly span - should still be captured since trace is anomalous overall ++ when(attributesMock.get(HTTP_RESPONSE_STATUS_CODE)).thenReturn(200L); ++ sampler.adaptSampling(readableSpanMock, spanDataMock, stubbedConsumer); ++ assertThat(sampler.getTraceUsageCache().getIfPresent("TRACE_ID")) ++ .isEqualTo(AwsXrayAdaptiveSamplingConfig.UsageType.BOTH); ++ assertThat(exportCounter.sumThenReset()).isEqualTo(1L); ++ } ++ ++ @Test + void operationFilteringInAdaptSampling() { + SamplingRule rule1 = + SamplingRule.create( @@ -2910,7 +3026,7 @@ index 1ca8df34..0464a304 100644 + AwsXrayAdaptiveSamplingConfig.AnomalyConditions.builder() + .setOperations(Arrays.asList("GET /api1", "GET /api2")) + .setErrorCodeRegex("^500$") -+ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_SPAN_CAPTURE) ++ .setUsage(AwsXrayAdaptiveSamplingConfig.UsageType.ANOMALY_TRACE_CAPTURE) + .build())) + .build(); + XrayRulesSampler sampler = diff --git a/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml b/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml index 6d5b5a119f..888ae7ee3e 100644 --- a/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml +++ b/awsagentprovider/src/test/resources/adaptive-sampling-config-invalid.yaml @@ -9,5 +9,5 @@ anomalyConditions: - errorCodeRegex: "^2\\d\\d$" operations: invalid part of config usage: both -errorCaptureLimit: - errorTracesPerSecond: 10 \ No newline at end of file +anomalyCaptureLimit: + anomalyTracesPerSecond: 10 \ No newline at end of file From c6c8138d282ddd054a4896fce83a210147015fb6 Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Wed, 27 Aug 2025 15:13:35 -0700 Subject: [PATCH 7/7] Ensure cache is kept between GST calls --- .../patches/opentelemetry-java-contrib.patch | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/.github/patches/opentelemetry-java-contrib.patch b/.github/patches/opentelemetry-java-contrib.patch index 27559888eb..95fe329942 100644 --- a/.github/patches/opentelemetry-java-contrib.patch +++ b/.github/patches/opentelemetry-java-contrib.patch @@ -911,7 +911,7 @@ index 1ef8abf5..328e63dd 100644 } } diff --git a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java -index 75977dc0..cb4a09b9 100644 +index 75977dc0..406941ba 100644 --- a/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java +++ b/aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/XrayRulesSampler.java @@ -5,42 +5,79 @@ @@ -995,7 +995,7 @@ index 75977dc0..cb4a09b9 100644 this( clientId, resource, -@@ -49,8 +86,17 @@ final class XrayRulesSampler implements Sampler { +@@ -49,8 +86,22 @@ final class XrayRulesSampler implements Sampler { rules.stream() // Lower priority value takes precedence so normal ascending sort. .sorted(Comparator.comparingInt(GetSamplingRulesResponse.SamplingRule::getPriority)) @@ -1011,11 +1011,16 @@ index 75977dc0..cb4a09b9 100644 + .toArray(SamplingRuleApplier[]::new), + createRuleHashMaps(rules), + rules.stream().anyMatch(r -> r.getSamplingRateBoost() != null), -+ adaptiveSamplingConfig); ++ adaptiveSamplingConfig, ++ Caffeine.newBuilder() ++ .maximumSize(100_000) ++ .ticker(clock::nanoTime) ++ .expireAfterWrite(Duration.ofMinutes(10)) ++ .build()); } private XrayRulesSampler( -@@ -58,12 +104,40 @@ final class XrayRulesSampler implements Sampler { +@@ -58,12 +109,36 @@ final class XrayRulesSampler implements Sampler { Resource resource, Clock clock, Sampler fallbackSampler, @@ -1023,7 +1028,8 @@ index 75977dc0..cb4a09b9 100644 + SamplingRuleApplier[] ruleAppliers, + Map ruleToHashMap, + boolean adaptiveSamplingRuleExists, -+ @Nullable AwsXrayAdaptiveSamplingConfig adaptiveSamplingConfig) { ++ @Nullable AwsXrayAdaptiveSamplingConfig adaptiveSamplingConfig, ++ Cache traceUsageCache) { this.clientId = clientId; this.resource = resource; this.clock = clock; @@ -1036,12 +1042,7 @@ index 75977dc0..cb4a09b9 100644 + } + this.adaptiveSamplingRuleExists = adaptiveSamplingRuleExists; + this.adaptiveSamplingConfig = adaptiveSamplingConfig; -+ this.traceUsageCache = -+ Caffeine.newBuilder() -+ .maximumSize(100_000) -+ .ticker(clock::nanoTime) -+ .expireAfterWrite(Duration.ofMinutes(10)) -+ .build(); ++ this.traceUsageCache = traceUsageCache; + + // Initialize anomaly capture rate limiter + if (this.adaptiveSamplingConfig != null @@ -1057,7 +1058,7 @@ index 75977dc0..cb4a09b9 100644 } @Override -@@ -74,10 +148,36 @@ final class XrayRulesSampler implements Sampler { +@@ -74,10 +149,36 @@ final class XrayRulesSampler implements Sampler { SpanKind spanKind, Attributes attributes, List parentLinks) { @@ -1096,7 +1097,7 @@ index 75977dc0..cb4a09b9 100644 } } -@@ -96,7 +196,185 @@ final class XrayRulesSampler implements Sampler { +@@ -96,7 +197,185 @@ final class XrayRulesSampler implements Sampler { return "XrayRulesSampler{" + Arrays.toString(ruleAppliers) + "}"; } @@ -1283,7 +1284,7 @@ index 75977dc0..cb4a09b9 100644 return Arrays.stream(ruleAppliers) .map(rule -> rule.snapshot(now)) .filter(Objects::nonNull) -@@ -115,15 +393,16 @@ final class XrayRulesSampler implements Sampler { +@@ -115,15 +394,16 @@ final class XrayRulesSampler implements Sampler { Map ruleTargets, Set requestedTargetRuleNames, Date now) { @@ -1302,7 +1303,7 @@ index 75977dc0..cb4a09b9 100644 } if (requestedTargetRuleNames.contains(rule.getRuleName())) { // In practice X-Ray should return a target for any rule we requested but -@@ -135,6 +414,91 @@ final class XrayRulesSampler implements Sampler { +@@ -135,6 +415,92 @@ final class XrayRulesSampler implements Sampler { return rule; }) .toArray(SamplingRuleApplier[]::new); @@ -1315,7 +1316,8 @@ index 75977dc0..cb4a09b9 100644 + newAppliers, + ruleToHashMap, + adaptiveSamplingRuleExists, -+ adaptiveSamplingConfig); ++ adaptiveSamplingConfig, ++ traceUsageCache); + } + + static boolean isKeyPresent(SpanData span, AttributeKey key) {