Skip to content

Commit fd57874

Browse files
authored
Rate limited sampling (#2456)
* Rate limited sampling * Comment * Comment * Feedback * Private * Link * Fix merge * Feedback * Fix * Fix * Fix * Add test * Spotless
1 parent 9a5747a commit fd57874

File tree

85 files changed

+1898
-290
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+1898
-290
lines changed

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/Configuration.java

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public void validate() {
7474
preview.validate();
7575
}
7676

77-
// TODO (trask) investigate options for mapping lowercase values to otel enum directly
77+
@Deprecated
7878
public enum SpanKind {
7979
@JsonProperty("server")
8080
SERVER(io.opentelemetry.api.trace.SpanKind.SERVER),
@@ -94,6 +94,18 @@ public enum SpanKind {
9494
}
9595
}
9696

97+
public enum SamplingTelemetryKind {
98+
// restricted to telemetry kinds that are supported by SamplingOverrides
99+
@JsonProperty("request")
100+
REQUEST,
101+
@JsonProperty("dependency")
102+
DEPENDENCY,
103+
@JsonProperty("trace")
104+
TRACE,
105+
@JsonProperty("exception")
106+
EXCEPTION
107+
}
108+
97109
public enum MatchType {
98110
@JsonProperty("strict")
99111
STRICT,
@@ -151,7 +163,12 @@ public static class Role {
151163

152164
public static class Sampling {
153165

154-
public float percentage = 100;
166+
// fixed percentage of requests
167+
@Nullable public Double percentage;
168+
169+
// default is 5 requests per second (set in ConfigurationBuilder if neither percentage nor
170+
// limitPerSecond was configured)
171+
@Nullable public Double limitPerSecond;
155172
}
156173

157174
public static class SamplingPreview {
@@ -175,6 +192,8 @@ public static class SamplingPreview {
175192
// Another (lesser) reason is because .NET SDK always propagates trace flags "00" (not
176193
// sampled)
177194
//
195+
// future goal: make parentBased sampling the default if item count is received via tracestate
196+
//
178197
// IMPORTANT if changing this default, we need to keep it at least on Azure Functions
179198
public boolean parentBased;
180199

@@ -352,7 +371,7 @@ public static class PreviewConfiguration {
352371
new HashSet<>(asList("b3", "b3multi"));
353372

354373
public void validate() {
355-
for (Configuration.SamplingOverride samplingOverride : sampling.overrides) {
374+
for (SamplingOverride samplingOverride : sampling.overrides) {
356375
samplingOverride.validate();
357376
}
358377
for (Configuration.InstrumentationKeyOverride instrumentationKeyOverride :
@@ -578,22 +597,36 @@ private static boolean isRuntimeAttached() {
578597
}
579598

580599
public static class SamplingOverride {
581-
// TODO (trask) consider making this required when moving out of preview
582-
@Nullable public SpanKind spanKind;
600+
@Deprecated @Nullable public SpanKind spanKind;
601+
602+
// TODO (trask) make this required when moving out of preview
603+
// for now the default is both "request" and "dependency" for backwards compatibility
604+
@Nullable public SamplingTelemetryKind telemetryKind;
605+
606+
// TODO (trask) add test for this
607+
// this is primarily useful for batch jobs
608+
public boolean includeStandaloneTelemetry;
609+
583610
// not using include/exclude, because you can still get exclude with this by adding a second
584611
// (exclude) override above it
585612
// (since only the first matching override is used)
586613
public List<SamplingOverrideAttribute> attributes = new ArrayList<>();
587-
public Float percentage;
614+
public Double percentage;
588615
public String id; // optional, used for debugging purposes only
589616

617+
public boolean isForRequestTelemetry() {
618+
return telemetryKind == SamplingTelemetryKind.REQUEST
619+
// this part is for backwards compatibility:
620+
|| (telemetryKind == null && spanKind != SpanKind.CLIENT);
621+
}
622+
623+
public boolean isForDependencyTelemetry() {
624+
return telemetryKind == SamplingTelemetryKind.DEPENDENCY
625+
// this part is for backwards compatibility:
626+
|| (telemetryKind == null && spanKind != SpanKind.SERVER);
627+
}
628+
590629
public void validate() {
591-
if (spanKind == null && attributes.isEmpty()) {
592-
// TODO add doc and go link, similar to telemetry processors
593-
throw new FriendlyException(
594-
"A sampling override configuration is missing \"spanKind\" and has no attributes.",
595-
"Please provide at least one of \"spanKind\" or \"attributes\" for the sampling override configuration.");
596-
}
597630
if (percentage == null) {
598631
// TODO add doc and go link, similar to telemetry processors
599632
throw new FriendlyException(

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/ConfigurationBuilder.java

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ public class ConfigurationBuilder {
8484
private static final String APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE =
8585
"APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE";
8686

87+
private static final String APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND =
88+
"APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND";
89+
8790
private static final String APPLICATIONINSIGHTS_INSTRUMENTATION_LOGGING_LEVEL =
8891
"APPLICATIONINSIGHTS_INSTRUMENTATION_LOGGING_LEVEL";
8992

@@ -190,6 +193,15 @@ private static void logConfigurationWarnings(Configuration config) {
190193
+ " and it is now enabled by default,"
191194
+ " so no need to enable it under preview configuration");
192195
}
196+
for (SamplingOverride override : config.preview.sampling.overrides) {
197+
if (override.spanKind != null) {
198+
configurationLogger.warn(
199+
"Sampling overrides \"spanKind\" has been deprecated,"
200+
+ " and support for it will be removed in a future release, please transition from"
201+
+ " \"spanKind\" to \"telemetryKind\".");
202+
}
203+
}
204+
193205
logWarningIfUsingInternalAttributes(config);
194206
}
195207

@@ -206,6 +218,10 @@ private static void overlayConfiguration(
206218
overlayFromEnv(rpConfiguration);
207219
overlayRpConfiguration(config, rpConfiguration);
208220
}
221+
// only fall back to default sampling configuration after all overlays have been performed
222+
if (config.sampling.limitPerSecond == null && config.sampling.percentage == null) {
223+
config.sampling.limitPerSecond = 5.0;
224+
}
209225
// only set role instance to host name as a last resort
210226
if (config.role.instance == null) {
211227
String hostname = HostName.get();
@@ -478,6 +494,10 @@ static void overlayFromEnv(Configuration config, Path baseDir) throws IOExceptio
478494
config.sampling.percentage =
479495
overlayWithEnvVar(APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE, config.sampling.percentage);
480496

497+
config.sampling.limitPerSecond =
498+
overlayWithEnvVar(
499+
APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND, config.sampling.limitPerSecond);
500+
481501
config.proxy = overlayProxyFromEnv(config.proxy);
482502

483503
config.selfDiagnostics.level =
@@ -487,10 +507,9 @@ static void overlayFromEnv(Configuration config, Path baseDir) throws IOExceptio
487507
APPLICATIONINSIGHTS_SELF_DIAGNOSTICS_FILE_PATH, config.selfDiagnostics.file.path);
488508

489509
config.preview.metricIntervalSeconds =
490-
(int)
491-
overlayWithEnvVar(
492-
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
493-
config.preview.metricIntervalSeconds);
510+
overlayWithEnvVar(
511+
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
512+
config.preview.metricIntervalSeconds);
494513

495514
config.preview.instrumentation.springIntegration.enabled =
496515
overlayWithEnvVar(
@@ -597,6 +616,7 @@ static void overlayRpConfiguration(Configuration config, RpConfiguration rpConfi
597616
}
598617
if (rpConfiguration.sampling != null) {
599618
config.sampling.percentage = rpConfiguration.sampling.percentage;
619+
config.sampling.limitPerSecond = rpConfiguration.sampling.limitPerSecond;
600620
}
601621
if (isTrimEmpty(config.role.name)) {
602622
// only use rp configuration role name as a fallback, similar to WEBSITE_SITE_NAME
@@ -646,13 +666,25 @@ public static String overlayWithEnvVar(String name, String defaultValue) {
646666
return defaultValue;
647667
}
648668

649-
static float overlayWithEnvVar(String name, float defaultValue) {
669+
@Nullable
670+
static Double overlayWithEnvVar(String name, @Nullable Double defaultValue) {
650671
String value = getEnvVar(name);
651672
if (value != null) {
652673
configurationLogger.debug("applying environment variable: {}={}", name, value);
653674
// intentionally allowing NumberFormatException to bubble up as invalid configuration and
654675
// prevent agent from starting
655-
return Float.parseFloat(value);
676+
return Double.parseDouble(value);
677+
}
678+
return defaultValue;
679+
}
680+
681+
static int overlayWithEnvVar(String name, int defaultValue) {
682+
String value = getEnvVar(name);
683+
if (value != null) {
684+
configurationLogger.debug("using environment variable: {}", name);
685+
// intentionally allowing NumberFormatException to bubble up as invalid configuration and
686+
// prevent agent from starting
687+
return Integer.parseInt(value);
656688
}
657689
return defaultValue;
658690
}
@@ -827,17 +859,21 @@ static String getJsonEncodingExceptionMessage(String message, JsonOrigin jsonOri
827859
}
828860

829861
// this is for external callers, where logging is ok
830-
public static float roundToNearest(float samplingPercentage) {
862+
public static double roundToNearest(double samplingPercentage) {
831863
return roundToNearest(samplingPercentage, false);
832864
}
833865

834-
// visible for testing
835-
private static float roundToNearest(float samplingPercentage, boolean doNotLogWarnMessages) {
866+
@Nullable
867+
private static Double roundToNearest(
868+
@Nullable Double samplingPercentage, boolean doNotLogWarnMessages) {
869+
if (samplingPercentage == null) {
870+
return null;
871+
}
836872
if (samplingPercentage == 0) {
837-
return 0;
873+
return 0.0;
838874
}
839875
double itemCount = 100 / samplingPercentage;
840-
float rounded = 100.0f / Math.round(itemCount);
876+
double rounded = 100.0 / Math.round(itemCount);
841877

842878
if (Math.abs(samplingPercentage - rounded) >= 1) {
843879
// TODO include link to docs in this warning message

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/exporter/AgentLogExporter.java

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,22 @@
2828
import com.azure.monitor.opentelemetry.exporter.implementation.logging.OperationLogger;
2929
import com.azure.monitor.opentelemetry.exporter.implementation.models.TelemetryItem;
3030
import com.azure.monitor.opentelemetry.exporter.implementation.quickpulse.QuickPulse;
31+
import com.microsoft.applicationinsights.agent.internal.configuration.Configuration.SamplingOverride;
32+
import com.microsoft.applicationinsights.agent.internal.sampling.AiSampler;
33+
import com.microsoft.applicationinsights.agent.internal.sampling.SamplingOverrides;
3134
import com.microsoft.applicationinsights.agent.internal.telemetry.BatchItemProcessor;
3235
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryClient;
3336
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryObservers;
37+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
3438
import io.opentelemetry.api.trace.SpanContext;
3539
import io.opentelemetry.sdk.common.CompletableResultCode;
3640
import io.opentelemetry.sdk.logs.data.LogData;
3741
import io.opentelemetry.sdk.logs.data.Severity;
3842
import io.opentelemetry.sdk.logs.export.LogExporter;
43+
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
3944
import java.util.Collection;
45+
import java.util.List;
46+
import java.util.concurrent.ThreadLocalRandom;
4047
import java.util.function.Consumer;
4148
import javax.annotation.Nullable;
4249
import org.slf4j.Logger;
@@ -52,15 +59,21 @@ public class AgentLogExporter implements LogExporter {
5259
// TODO (trask) could implement this in a filtering LogExporter instead
5360
private volatile Severity threshold;
5461

62+
private final SamplingOverrides logSamplingOverrides;
63+
private final SamplingOverrides exceptionSamplingOverrides;
5564
private final LogDataMapper mapper;
5665
private final Consumer<TelemetryItem> telemetryItemConsumer;
5766

5867
public AgentLogExporter(
5968
Severity threshold,
69+
List<SamplingOverride> logSamplingOverrides,
70+
List<SamplingOverride> exceptionSamplingOverrides,
6071
LogDataMapper mapper,
6172
@Nullable QuickPulse quickPulse,
6273
BatchItemProcessor batchItemProcessor) {
6374
this.threshold = threshold;
75+
this.logSamplingOverrides = new SamplingOverrides(logSamplingOverrides);
76+
this.exceptionSamplingOverrides = new SamplingOverrides(exceptionSamplingOverrides);
6477
this.mapper = mapper;
6578
telemetryItemConsumer =
6679
telemetryItem -> {
@@ -86,18 +99,46 @@ public CompletableResultCode export(Collection<LogData> logs) {
8699
return CompletableResultCode.ofFailure();
87100
}
88101
for (LogData log : logs) {
89-
SpanContext spanContext = log.getSpanContext();
90-
if (spanContext.isValid() && !spanContext.getTraceFlags().isSampled()) {
91-
continue;
92-
}
93102
logger.debug("exporting log: {}", log);
94103
try {
95104
int severity = log.getSeverity().getSeverityNumber();
96105
int threshold = this.threshold.getSeverityNumber();
97106
if (severity < threshold) {
98107
continue;
99108
}
100-
mapper.map(log, telemetryItemConsumer);
109+
110+
String stack = log.getAttributes().get(SemanticAttributes.EXCEPTION_STACKTRACE);
111+
112+
SamplingOverrides samplingOverrides =
113+
stack != null ? exceptionSamplingOverrides : logSamplingOverrides;
114+
115+
SpanContext spanContext = log.getSpanContext();
116+
117+
boolean standaloneLog = !spanContext.isValid();
118+
Double samplingPercentage =
119+
samplingOverrides.getOverridePercentage(standaloneLog, log.getAttributes());
120+
121+
if (samplingPercentage != null && !shouldSample(spanContext, samplingPercentage)) {
122+
continue;
123+
}
124+
125+
if (samplingPercentage == null
126+
&& !standaloneLog
127+
&& !spanContext.getTraceFlags().isSampled()) {
128+
// if there is no sampling override, and the log is part of an unsampled trace, then don't
129+
// capture it
130+
continue;
131+
}
132+
133+
Long itemCount = null;
134+
if (samplingPercentage != null) {
135+
// samplingPercentage cannot be 0 here
136+
itemCount = Math.round(100.0 / samplingPercentage);
137+
}
138+
139+
TelemetryItem telemetryItem = mapper.map(log, stack, itemCount);
140+
telemetryItemConsumer.accept(telemetryItem);
141+
101142
exportingLogLogger.recordSuccess();
102143
} catch (Throwable t) {
103144
exportingLogLogger.recordFailure(t.getMessage(), t, EXPORTER_MAPPING_ERROR);
@@ -116,4 +157,23 @@ public CompletableResultCode flush() {
116157
public CompletableResultCode shutdown() {
117158
return CompletableResultCode.ofSuccess();
118159
}
160+
161+
@SuppressFBWarnings(
162+
value = "SECPR", // Predictable pseudorandom number generator
163+
justification = "Predictable random is ok for sampling decision")
164+
private static boolean shouldSample(SpanContext spanContext, double percentage) {
165+
if (percentage == 100) {
166+
// optimization, no need to calculate score
167+
return true;
168+
}
169+
if (percentage == 0) {
170+
// optimization, no need to calculate score
171+
return false;
172+
}
173+
if (spanContext.isValid()) {
174+
return AiSampler.shouldRecordAndSample(spanContext.getTraceId(), percentage);
175+
}
176+
// this is a standalone log (not part of a trace), so randomly sample at the given percentage
177+
return ThreadLocalRandom.current().nextDouble() < percentage / 100;
178+
}
119179
}

0 commit comments

Comments
 (0)