Skip to content

Commit 75156f6

Browse files
committed
Add mechanism to remove exception probes
Add circuit breaker when evaluating exception probe to detect when an exception probe is executed in a hot path only waiting for exception to happen. when circuit breaker is open after 1000 calls in 10 seconds window, we check if there is no exception captured during this period too and remove the probe from the configuration and un-instrument it. This will prevent too much overhead for hot path
1 parent 0b03104 commit 75156f6

File tree

4 files changed

+106
-35
lines changed

4 files changed

+106
-35
lines changed

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ public DefaultExceptionDebugger(
5757
ClassNameFilter classNameFiltering,
5858
int maxExceptionPerSecond) {
5959
this.exceptionProbeManager = exceptionProbeManager;
60+
this.exceptionProbeManager.setDefaultExceptionDebugger(this);
6061
this.configurationUpdater = configurationUpdater;
6162
this.classNameFiltering = classNameFiltering;
6263
this.circuitBreaker = new CircuitBreaker(maxExceptionPerSecond, Duration.ofSeconds(1));
@@ -102,7 +103,11 @@ public void handleException(Throwable t, AgentSpan span) {
102103
exceptionProbeManager.createProbesForException(
103104
throwable.getStackTrace(), chainedExceptionIdx);
104105
if (creationResult.probesCreated > 0) {
105-
AgentTaskScheduler.INSTANCE.execute(() -> applyExceptionConfiguration(fingerprint));
106+
AgentTaskScheduler.INSTANCE.execute(
107+
() -> {
108+
applyExceptionConfiguration();
109+
exceptionProbeManager.addFingerprint(fingerprint);
110+
});
106111
break;
107112
} else {
108113
if (LOGGER.isDebugEnabled()) {
@@ -118,9 +123,8 @@ public void handleException(Throwable t, AgentSpan span) {
118123
}
119124
}
120125

121-
private void applyExceptionConfiguration(String fingerprint) {
126+
void applyExceptionConfiguration() {
122127
configurationUpdater.accept(EXCEPTION, exceptionProbeManager.getProbes());
123-
exceptionProbeManager.addFingerprint(fingerprint);
124128
}
125129

126130
private static void processSnapshotsAndSetTags(

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/ExceptionProbeManager.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import datadog.trace.api.Config;
99
import datadog.trace.bootstrap.debugger.DebuggerContext.ClassNameFilter;
1010
import datadog.trace.bootstrap.debugger.ProbeId;
11+
import datadog.trace.util.AgentTaskScheduler;
1112
import java.time.Clock;
1213
import java.time.Duration;
1314
import java.time.Instant;
@@ -24,6 +25,7 @@
2425

2526
/** Manages the probes used for instrumentation of exception stacktraces. */
2627
public class ExceptionProbeManager {
28+
public static final int HOT_LOOP_TIME_WINDOW_IN_SECONDS = 10;
2729
private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionProbeManager.class);
2830

2931
private final Map<String, Instant> fingerprints = new ConcurrentHashMap<>();
@@ -35,6 +37,7 @@ public class ExceptionProbeManager {
3537
private final long captureIntervalS;
3638
private final Clock clock;
3739
private final int maxCapturedFrames;
40+
private DefaultExceptionDebugger defaultExceptionDebugger;
3841

3942
public ExceptionProbeManager(ClassNameFilter classNameFiltering, Duration captureInterval) {
4043
this(
@@ -67,6 +70,19 @@ public ClassNameFilter getClassNameFilter() {
6770
return classNameFiltering;
6871
}
6972

73+
public void removeProbe(ExceptionProbe exceptionProbe) {
74+
probes.remove(exceptionProbe.getId());
75+
if (defaultExceptionDebugger != null) {
76+
AgentTaskScheduler.INSTANCE.execute(
77+
() -> defaultExceptionDebugger.applyExceptionConfiguration());
78+
}
79+
}
80+
81+
public boolean shouldRemoveProbe(Instant lastCaptureTime) {
82+
return ChronoUnit.SECONDS.between(lastCaptureTime, Instant.now(clock))
83+
>= HOT_LOOP_TIME_WINDOW_IN_SECONDS;
84+
}
85+
7086
static class CreationResult {
7187
final int probesCreated;
7288
final int thirdPartyFrames;
@@ -139,18 +155,21 @@ public Map<String, Instant> getFingerprints() {
139155
return fingerprints;
140156
}
141157

142-
public boolean shouldCaptureException(String fingerprint) {
143-
return shouldCaptureException(fingerprint, clock);
158+
public boolean shouldCaptureException(Instant lastCapture) {
159+
return shouldCaptureException(lastCapture, clock);
144160
}
145161

146-
boolean shouldCaptureException(String fingerprint, Clock clock) {
147-
Instant lastCapture = fingerprints.get(fingerprint);
162+
boolean shouldCaptureException(Instant lastCapture, Clock clock) {
148163
if (lastCapture == null) {
149164
return false;
150165
}
151166
return ChronoUnit.SECONDS.between(lastCapture, Instant.now(clock)) >= captureIntervalS;
152167
}
153168

169+
public Instant getLastCapture(String fingerprint) {
170+
return fingerprints.get(fingerprint);
171+
}
172+
154173
public void addSnapshot(Snapshot snapshot) {
155174
Throwable throwable = snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable();
156175
if (throwable == null) {
@@ -213,4 +232,8 @@ public void addSnapshot(Snapshot snapshot) {
213232
snapshots.add(snapshot);
214233
}
215234
}
235+
236+
void setDefaultExceptionDebugger(DefaultExceptionDebugger defaultExceptionDebugger) {
237+
this.defaultExceptionDebugger = defaultExceptionDebugger;
238+
}
216239
}

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/ExceptionProbe.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.datadog.debugger.probe;
22

3+
import static com.datadog.debugger.exception.ExceptionProbeManager.HOT_LOOP_TIME_WINDOW_IN_SECONDS;
34
import static com.datadog.debugger.util.ExceptionHelper.getInnerMostThrowable;
45
import static java.util.Collections.emptyList;
56

@@ -8,12 +9,15 @@
89
import com.datadog.debugger.exception.Fingerprinter;
910
import com.datadog.debugger.instrumentation.InstrumentationResult;
1011
import com.datadog.debugger.sink.Snapshot;
12+
import com.datadog.debugger.util.CircuitBreaker;
1113
import datadog.trace.bootstrap.debugger.CapturedContext;
1214
import datadog.trace.bootstrap.debugger.MethodLocation;
1315
import datadog.trace.bootstrap.debugger.ProbeId;
1416
import datadog.trace.bootstrap.debugger.ProbeImplementation;
1517
import datadog.trace.bootstrap.debugger.ProbeLocation;
1618
import datadog.trace.bootstrap.debugger.el.ValueReferences;
19+
import java.time.Duration;
20+
import java.time.Instant;
1721
import java.util.List;
1822
import org.slf4j.Logger;
1923
import org.slf4j.LoggerFactory;
@@ -22,6 +26,9 @@ public class ExceptionProbe extends LogProbe implements ForceMethodInstrumentati
2226
private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionProbe.class);
2327
private final transient ExceptionProbeManager exceptionProbeManager;
2428
private final transient int chainedExceptionIdx;
29+
private final transient CircuitBreaker circuitBreaker =
30+
new CircuitBreaker(1000, Duration.ofSeconds(HOT_LOOP_TIME_WINDOW_IN_SECONDS));
31+
private Instant lastCaptureTime = Instant.MIN;
2532

2633
public ExceptionProbe(
2734
ProbeId probeId,
@@ -61,6 +68,14 @@ public CapturedContext.Status createStatus() {
6168
@Override
6269
public void evaluate(
6370
CapturedContext context, CapturedContext.Status status, MethodLocation methodLocation) {
71+
if (!circuitBreaker.trip()) {
72+
if (exceptionProbeManager.shouldRemoveProbe(lastCaptureTime)) {
73+
// hot loop without exception captured => uninstrument this probe
74+
LOGGER.debug("Removing probe {}", id);
75+
exceptionProbeManager.removeProbe(this);
76+
return;
77+
}
78+
}
6479
if (!(status instanceof ExceptionProbeStatus)) {
6580
throw new IllegalStateException("Invalid status: " + status.getClass());
6681
}
@@ -78,7 +93,9 @@ public void evaluate(
7893
Throwable innerMostThrowable = getInnerMostThrowable(throwable);
7994
String fingerprint =
8095
Fingerprinter.fingerprint(innerMostThrowable, exceptionProbeManager.getClassNameFilter());
81-
if (exceptionProbeManager.shouldCaptureException(fingerprint)) {
96+
Instant lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
97+
if (exceptionProbeManager.shouldCaptureException(lastCapture)) {
98+
8299
LOGGER.debug("Capturing exception matching fingerprint: {}", fingerprint);
83100
// capture only on uncaught exception matching the fingerprint
84101
ExceptionProbeStatus exceptionStatus = (ExceptionProbeStatus) status;

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeManagerTest.java

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,60 @@
11
package com.datadog.debugger.exception;
22

3+
import static com.datadog.debugger.util.TestHelper.assertWithTimeout;
4+
import static java.util.Arrays.asList;
35
import static org.junit.jupiter.api.Assertions.assertEquals;
46
import static org.junit.jupiter.api.Assertions.assertFalse;
57
import static org.junit.jupiter.api.Assertions.assertTrue;
8+
import static org.mockito.Mockito.doAnswer;
69
import static org.mockito.Mockito.mock;
710
import static org.mockito.Mockito.when;
811

912
import com.datadog.debugger.probe.ExceptionProbe;
1013
import com.datadog.debugger.util.ClassNameFiltering;
1114
import datadog.trace.api.Config;
15+
import datadog.trace.bootstrap.debugger.CapturedContext;
16+
import datadog.trace.bootstrap.debugger.MethodLocation;
1217
import java.time.Clock;
1318
import java.time.Duration;
1419
import java.time.Instant;
1520
import java.util.Collections;
21+
import java.util.HashSet;
22+
import java.util.Set;
23+
import java.util.concurrent.atomic.AtomicBoolean;
1624
import java.util.stream.Collectors;
1725
import java.util.stream.Stream;
26+
import org.jetbrains.annotations.NotNull;
1827
import org.junit.jupiter.api.Test;
1928

2029
class ExceptionProbeManagerTest {
30+
private static final @NotNull Set<String> FILTERED_PACKAGES =
31+
new HashSet<>(
32+
asList(
33+
"java.",
34+
"jdk.",
35+
"sun.",
36+
"com.sun.",
37+
"org.gradle.",
38+
"worker.org.gradle.",
39+
"org.junit."));
2140
private final RuntimeException exception = new RuntimeException("test");
2241

2342
@Test
2443
public void instrumentStackTrace() {
2544
ClassNameFiltering classNameFiltering = ClassNameFiltering.allowAll();
2645
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);
2746
RuntimeException exception = new RuntimeException("test");
28-
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
2947
exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0);
3048
assertFalse(exceptionProbeManager.getProbes().isEmpty());
3149
}
3250

3351
@Test
3452
void instrumentSingleFrame() {
35-
ClassNameFiltering classNameFiltering =
36-
new ClassNameFiltering(
37-
Stream.of(
38-
"java.",
39-
"jdk.",
40-
"sun.",
41-
"com.sun.",
42-
"org.gradle.",
43-
"worker.org.gradle.",
44-
"org.junit.")
45-
.collect(Collectors.toSet()));
53+
ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES);
4654
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);
4755

4856
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
49-
assertEquals("4974b2b4853e6152d8f218fb79a42a761a45335447e22e53d75f5325e742655", fingerprint);
57+
assertEquals("44cbdcfe32eea21c3523e4a5885daabf5b8c9428cc0f613be5ff29663465ff9", fingerprint);
5058
exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0);
5159
assertEquals(1, exceptionProbeManager.getProbes().size());
5260
ExceptionProbe exceptionProbe = exceptionProbeManager.getProbes().iterator().next();
@@ -82,34 +90,53 @@ void lastCapture() {
8290
RuntimeException exception = new RuntimeException("test");
8391
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
8492
exceptionProbeManager.addFingerprint(fingerprint);
85-
assertTrue(exceptionProbeManager.shouldCaptureException(fingerprint));
93+
Instant lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
94+
assertTrue(exceptionProbeManager.shouldCaptureException(lastCapture));
8695
exceptionProbeManager.updateLastCapture(fingerprint);
87-
assertFalse(exceptionProbeManager.shouldCaptureException(fingerprint));
96+
lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
97+
assertFalse(exceptionProbeManager.shouldCaptureException(lastCapture));
8898
Clock clock =
8999
Clock.fixed(Instant.now().plus(Duration.ofMinutes(61)), Clock.systemUTC().getZone());
90-
assertTrue(exceptionProbeManager.shouldCaptureException(fingerprint, clock));
100+
lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
101+
assertTrue(exceptionProbeManager.shouldCaptureException(lastCapture, clock));
91102
}
92103

93104
@Test
94105
void maxFrames() {
95106
RuntimeException deepException = level1();
96-
ClassNameFiltering classNameFiltering =
97-
new ClassNameFiltering(
98-
Stream.of(
99-
"java.",
100-
"jdk.",
101-
"sun.",
102-
"com.sun.",
103-
"org.gradle.",
104-
"worker.org.gradle.",
105-
"org.junit.")
106-
.collect(Collectors.toSet()));
107+
ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES);
107108
ExceptionProbeManager exceptionProbeManager =
108109
new ExceptionProbeManager(classNameFiltering, Duration.ofHours(1), Clock.systemUTC(), 3);
109110
exceptionProbeManager.createProbesForException(deepException.getStackTrace(), 0);
110111
assertEquals(3, exceptionProbeManager.getProbes().size());
111112
}
112113

114+
@Test
115+
public void removeExceptionProbeOnHotLoop() {
116+
ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES);
117+
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);
118+
AtomicBoolean configApplied = new AtomicBoolean(false);
119+
DefaultExceptionDebugger defaultExceptionDebugger = mock(DefaultExceptionDebugger.class);
120+
doAnswer(
121+
invocationOnMock -> {
122+
configApplied.set(true);
123+
return null;
124+
})
125+
.when(defaultExceptionDebugger)
126+
.applyExceptionConfiguration();
127+
exceptionProbeManager.setDefaultExceptionDebugger(defaultExceptionDebugger);
128+
exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0);
129+
assertEquals(1, exceptionProbeManager.getProbes().size());
130+
ExceptionProbe exceptionProbe = exceptionProbeManager.getProbes().iterator().next();
131+
// simulate a code hot loop
132+
for (int i = 0; i < 2000; i++) {
133+
CapturedContext context = mock(CapturedContext.class);
134+
exceptionProbe.evaluate(context, exceptionProbe.createStatus(), MethodLocation.EXIT);
135+
}
136+
assertEquals(0, exceptionProbeManager.getProbes().size());
137+
assertWithTimeout(configApplied::get, Duration.ofSeconds(30));
138+
}
139+
113140
RuntimeException level1() {
114141
return level2();
115142
}

0 commit comments

Comments
 (0)