Skip to content

Commit b294cf2

Browse files
tylfinP403n1x87
andauthored
chore(er): ensure max frame per trace (#12791) (#13576)
We make sure that the max frame settings is honoured for a whole trace instead of a single span to avoid capturing too many snapshots. (cherry picked from commit 560b7a6) (cherry picked from commit bb68a4f) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Gabriele N. Tornetta <[email protected]>
1 parent fb2d052 commit b294cf2

File tree

4 files changed

+75
-2
lines changed

4 files changed

+75
-2
lines changed

ddtrace/debugging/_exception/replay.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ddtrace.internal.rate_limiter import BudgetRateLimiterWithJitter as RateLimiter
2121
from ddtrace.internal.rate_limiter import RateLimitExceeded
2222
from ddtrace.internal.utils.time import HourGlass
23+
from ddtrace.settings.exception_replay import config
2324
from ddtrace.trace import Span
2425

2526

@@ -38,6 +39,7 @@
3839

3940
# used to rate limit decision on the entire local trace (stored at the root span)
4041
CAPTURE_TRACE_TAG = "_dd.debug.error.trace_captured"
42+
SNAPSHOT_COUNT_TAG = "_dd.debug.error.snapshot_count"
4143

4244
# unique exception id
4345
EXCEPTION_HASH_TAG = "_dd.debug.error.exception_hash"
@@ -201,6 +203,18 @@ def can_capture(span: Span) -> bool:
201203
raise ValueError(msg)
202204

203205

206+
def get_snapshot_count(span: Span) -> int:
207+
root = span._local_root
208+
if root is None:
209+
return 0
210+
211+
count = root.get_metric(SNAPSHOT_COUNT_TAG)
212+
if count is None:
213+
return 0
214+
215+
return int(count)
216+
217+
204218
class SpanExceptionHandler:
205219
__uploader__ = LogsIntakeUploaderV1
206220

@@ -225,20 +239,22 @@ def on_span_exception(
225239

226240
seq = count(1) # 1-based sequence number
227241

228-
while chain:
242+
frames_captured = get_snapshot_count(span)
243+
while chain and frames_captured <= config.max_frames:
229244
exc, _tb = chain.pop() # LIFO: reverse the chain
230245

231246
if _tb is None or _tb.tb_frame is None:
232247
# If we don't have a traceback there isn't much we can do
233248
continue
234249

235250
# DEV: We go from the handler up to the root exception
236-
while _tb:
251+
while _tb and frames_captured < config.max_frames:
237252
frame = _tb.tb_frame
238253
code = frame.f_code
239254
seq_nr = next(seq)
240255

241256
if is_user_code(Path(frame.f_code.co_filename)):
257+
snapshot = None
242258
snapshot_id = frame.f_locals.get(SNAPSHOT_KEY, None)
243259
if snapshot_id is None:
244260
# We don't have a snapshot for the frame so we create one
@@ -263,6 +279,9 @@ def on_span_exception(
263279
# Memoize
264280
frame.f_locals[SNAPSHOT_KEY] = snapshot_id = snapshot.uuid
265281

282+
# Count
283+
frames_captured += 1
284+
266285
# Add correlation tags on the span
267286
span.set_tag_str(FRAME_SNAPSHOT_ID_TAG % seq_nr, snapshot_id)
268287
span.set_tag_str(FRAME_FUNCTION_TAG % seq_nr, code.co_name)
@@ -276,6 +295,11 @@ def on_span_exception(
276295
span.set_tag_str(EXCEPTION_HASH_TAG, str(exc_ident))
277296
span.set_tag_str(EXCEPTION_ID_TAG, str(exc_id))
278297

298+
# Update the snapshot count
299+
root = span._local_root
300+
if root is not None:
301+
root.set_metric(SNAPSHOT_COUNT_TAG, frames_captured)
302+
279303
@classmethod
280304
def enable(cls) -> None:
281305
if cls._instance is not None:

ddtrace/settings/exception_replay.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ class ExceptionReplayConfig(En):
1414
help="Enable automatic capturing of exception debugging information",
1515
deprecations=[("debugging.enabled", None, "3.0")],
1616
)
17+
max_frames = En.v(
18+
int,
19+
"replay.capture_max_frames",
20+
default=8,
21+
help_type="int",
22+
help="The maximum number of frames to capture for each exception",
23+
)
1724

1825

1926
config = ExceptionReplayConfig()

tests/debugging/exception/test_replay.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,44 @@ def a(v):
314314
assert all(
315315
s.line_capture["locals"]["nonloc"] == {"type": "int", "value": "4"} for s in uploader.collector.queue
316316
)
317+
318+
def test_debugger_max_frames(self):
319+
config = ExceptionReplayConfig()
320+
root = None
321+
322+
def r(n=config.max_frames * 2, c=None):
323+
if n == 0:
324+
if c is None:
325+
raise ValueError("hello")
326+
else:
327+
c()
328+
r(n - 1, c)
329+
330+
def a():
331+
with self.trace("a"):
332+
r()
333+
334+
def b():
335+
with self.trace("b"):
336+
r(10, a)
337+
338+
def c():
339+
nonlocal root
340+
341+
with self.trace("c") as root:
342+
r(10, b)
343+
344+
with exception_replay() as uploader:
345+
rate_limiter = RateLimiter(
346+
limit_rate=float("inf"), # no rate limit
347+
raise_on_exceed=False,
348+
)
349+
with with_rate_limiter(rate_limiter):
350+
with pytest.raises(ValueError):
351+
c()
352+
353+
self.assert_span_count(3)
354+
n = uploader.collector.queue
355+
assert len(n) == config.max_frames
356+
357+
assert root.get_metric(replay.SNAPSHOT_COUNT_TAG) == config.max_frames

tests/telemetry/test_writer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ def test_app_started_event_configuration_override(test_agent_session, run_python
354354
{"name": "DD_DYNAMIC_INSTRUMENTATION_UPLOAD_FLUSH_INTERVAL", "origin": "default", "value": 1.0},
355355
{"name": "DD_DYNAMIC_INSTRUMENTATION_UPLOAD_TIMEOUT", "origin": "default", "value": 30},
356356
{"name": "DD_ENV", "origin": "default", "value": None},
357+
{"name": "DD_EXCEPTION_REPLAY_CAPTURE_MAX_FRAMES", "origin": "default", "value": 8},
357358
{"name": "DD_EXCEPTION_REPLAY_ENABLED", "origin": "env_var", "value": True},
358359
{"name": "DD_EXPERIMENTAL_APPSEC_STANDALONE_ENABLED", "origin": "default", "value": False},
359360
{"name": "DD_HTTP_CLIENT_TAG_QUERY_STRING", "origin": "default", "value": None},

0 commit comments

Comments
 (0)