Skip to content

Commit f0fca13

Browse files
fix(debugging): report error info on every probe error [backport 1.20] (#7187)
Backport a59919e from #7095 to 1.20. We make sure that we always have error information whenever an error occurs while handing or instrumenting a probe. Previously, we only added the information if a Python exception was available. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. Co-authored-by: Gabriele N. Tornetta <[email protected]>
1 parent bf17e03 commit f0fca13

File tree

8 files changed

+59
-63
lines changed

8 files changed

+59
-63
lines changed

ddtrace/debugging/_debugger.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ def _probe_injection_hook(self, module):
458458
origin(module),
459459
)
460460
log.error(message)
461-
self._probe_registry.set_error(probe, message)
461+
self._probe_registry.set_error(probe, "NoFunctionsAtLine", message)
462462
continue
463463
for function in (cast(FullyNamedWrappedFunction, _) for _ in functions):
464464
probes_for_function[function].append(cast(LineProbe, probe))
@@ -470,7 +470,7 @@ def _probe_injection_hook(self, module):
470470

471471
for probe in probes:
472472
if probe.probe_id in failed:
473-
self._probe_registry.set_error(probe, "Failed to inject")
473+
self._probe_registry.set_error(probe, "InjectionFailure", "Failed to inject")
474474
else:
475475
self._probe_registry.set_installed(probe)
476476

@@ -497,7 +497,7 @@ def _inject_probes(self, probes):
497497
log.error(
498498
"Cannot inject probe %s: source file %s cannot be resolved", probe.probe_id, probe.source_file
499499
)
500-
self._probe_registry.set_error(probe, "Source file location cannot be resolved")
500+
self._probe_registry.set_error(probe, "NoSourceFile", "Source file location cannot be resolved")
501501
continue
502502

503503
for source in {probe.source_file for probe in probes if probe.source_file is not None}:
@@ -508,7 +508,10 @@ def _inject_probes(self, probes):
508508
for probe in probes:
509509
if probe.source_file != source:
510510
continue
511-
self._probe_registry.set_exc_info(probe, exc_info)
511+
exc_type, exc, _ = exc_info
512+
self._probe_registry.set_error(
513+
probe, exc_type.__name__ if exc_type is not None else type(exc).__name__, str(exc)
514+
)
512515
log.error("Cannot register probe injection hook on source '%s'", source, exc_info=True)
513516

514517
def _eject_probes(self, probes_to_eject):
@@ -578,7 +581,7 @@ def _probe_wrapping_hook(self, module):
578581
probe.func_qname,
579582
probe.module,
580583
)
581-
self._probe_registry.set_error(probe, message)
584+
self._probe_registry.set_error(probe, "NoFunctionInModule", message)
582585
log.error(message)
583586
continue
584587

@@ -614,7 +617,10 @@ def _wrap_functions(self, probes):
614617
assert probe.module is not None # nosec
615618
self.__watchdog__.register_module_hook(probe.module, self._probe_wrapping_hook)
616619
except Exception:
617-
self._probe_registry.set_exc_info(probe, sys.exc_info())
620+
exc_type, exc, _ = sys.exc_info()
621+
self._probe_registry.set_error(
622+
probe, exc_type.__name__ if exc_type is not None else type(exc).__name__, str(exc)
623+
)
618624
log.error("Cannot register probe wrapping hook on module '%s'", probe.module, exc_info=True)
619625

620626
def _unwrap_functions(self, probes):

ddtrace/debugging/_probe/registry.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,29 @@
88
from ddtrace.debugging._probe.model import ProbeLocationMixin
99
from ddtrace.debugging._probe.status import ProbeStatusLogger
1010
from ddtrace.internal import forksafe
11-
from ddtrace.internal.compat import ExcInfoType
1211
from ddtrace.internal.logger import get_logger
1312

1413

1514
logger = get_logger(__name__)
1615

1716

1817
class ProbeRegistryEntry(object):
19-
20-
__slots__ = ("probe", "installed", "exc_info", "message")
18+
__slots__ = ("probe", "installed", "error_type", "message")
2119

2220
def __init__(self, probe):
2321
# type: (Probe) -> None
2422
self.probe = probe
2523
self.installed = False
26-
self.exc_info = None # type: Optional[ExcInfoType]
24+
self.error_type = None # type: Optional[str]
2725
self.message = None # type: Optional[str]
2826

2927
def set_installed(self):
3028
# type: () -> None
3129
self.installed = True
3230

33-
def set_exc_info(self, exc_info):
34-
# type: (ExcInfoType) -> None
35-
self.exc_info = exc_info
36-
37-
def set_message(self, message):
38-
# type: (str) -> None
31+
def set_error(self, error_type, message):
32+
# type: (str, str) -> None
33+
self.error_type = error_type
3934
self.message = message
4035

4136
def update(self, probe):
@@ -83,7 +78,11 @@ def register(self, *probes):
8378

8479
location = _get_probe_location(probe)
8580
if location is None:
86-
self.set_error(probe, "Unable to resolve location information for probe {}".format(probe.probe_id))
81+
self.set_error(
82+
probe,
83+
"UnresolvedLocation",
84+
"Unable to resolve location information for probe {}".format(probe.probe_id),
85+
)
8786
continue
8887

8988
self._pending[location].append(probe)
@@ -111,28 +110,20 @@ def set_installed(self, probe):
111110

112111
self.logger.installed(probe)
113112

114-
def set_exc_info(self, probe, exc_info):
115-
# type: (Probe, ExcInfoType) -> None
116-
"""Set the installed flag for a probe."""
117-
with self._lock:
118-
self[probe.probe_id].set_exc_info(exc_info)
119-
self.logger.error(probe, exc_info=exc_info)
120-
121-
def set_error(self, probe, message):
122-
# type: (Probe, str) -> None
113+
def set_error(self, probe, error_type, message):
114+
# type: (Probe, str, str) -> None
123115
"""Set the error message for a probe."""
124116
with self._lock:
125-
self[probe.probe_id].set_message(message)
126-
self.logger.error(probe, message)
117+
self[probe.probe_id].set_error(error_type, message)
118+
self.logger.error(probe, (error_type, message))
127119

128120
def _log_probe_status_unlocked(self, entry):
129121
# type: (ProbeRegistryEntry) -> None
130122
if entry.installed:
131123
self.logger.installed(entry.probe)
132-
elif entry.exc_info:
133-
self.logger.error(entry.probe, exc_info=entry.exc_info)
134-
elif entry.message:
135-
self.logger.error(entry.probe, message=entry.message)
124+
elif entry.error_type:
125+
assert entry.message is not None, entry # nosec
126+
self.logger.error(entry.probe, error=(entry.error_type, entry.message))
136127
else:
137128
self.logger.received(entry.probe)
138129

ddtrace/debugging/_probe/remoteconfig.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,7 @@ def get_probes(config, status_logger):
288288
except Exception as e:
289289
status_logger.error(
290290
probe=Probe(probe_id=config["id"], version=config["version"], tags={}),
291-
message=str(e),
292-
exc_info=sys.exc_info(),
291+
error=(type(e).__name__, str(e)),
293292
)
294293
return []
295294

ddtrace/debugging/_probe/status.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,26 @@
1010
from ddtrace.debugging._encoding import add_tags
1111
from ddtrace.debugging._metrics import metrics
1212
from ddtrace.debugging._probe.model import Probe
13-
from ddtrace.debugging._signal import utils
1413
from ddtrace.internal import runtime
15-
from ddtrace.internal.compat import ExcInfoType
1614
from ddtrace.internal.logger import get_logger
1715

1816

1917
log = get_logger(__name__)
2018
meter = metrics.get_meter("probe.status")
2119

2220

21+
ErrorInfo = Tuple[str, str]
22+
23+
2324
class ProbeStatusLogger(object):
2425
def __init__(self, service, encoder):
2526
# type: (str, BufferedEncoder) -> None
2627
self._service = service
2728
self._encoder = encoder
2829
self._retry_queue = deque() # type: deque[Tuple[str, float]]
2930

30-
def _payload(self, probe, status, message, timestamp, exc_info=None):
31-
# type: (Probe, str, str, float, Optional[ExcInfoType]) -> str
31+
def _payload(self, probe, status, message, timestamp, error=None):
32+
# type: (Probe, str, str, float, Optional[ErrorInfo]) -> str
3233
payload = {
3334
"service": self._service,
3435
"timestamp": int(timestamp * 1e3), # milliseconds
@@ -46,19 +47,17 @@ def _payload(self, probe, status, message, timestamp, exc_info=None):
4647

4748
add_tags(payload)
4849

49-
if exc_info is not None:
50-
exc_type, exc, tb = exc_info
51-
assert exc_type is not None and tb is not None, exc_info # nosec
50+
if error is not None:
51+
error_type, message = error
5252
payload["debugger"]["diagnostics"]["exception"] = { # type: ignore[index]
53-
"type": exc_type.__name__,
54-
"message": str(exc),
55-
"stacktrace": utils.capture_stack(tb.tb_frame),
53+
"type": error_type,
54+
"message": message,
5655
}
5756

5857
return json.dumps(payload)
5958

60-
def _write(self, probe, status, message, exc_info=None):
61-
# type: (Probe, str, str, Optional[ExcInfoType]) -> None
59+
def _write(self, probe, status, message, error=None):
60+
# type: (Probe, str, str, Optional[ErrorInfo]) -> None
6261
if self._retry_queue:
6362
meter.distribution("backlog.size", len(self._retry_queue))
6463

@@ -80,7 +79,7 @@ def _write(self, probe, status, message, exc_info=None):
8079
meter.increment("backlog.buffer_full")
8180
return
8281

83-
payload = self._payload(probe, status, message, now, exc_info)
82+
payload = self._payload(probe, status, message, now, error)
8483

8584
try:
8685
self._encoder.put(payload)
@@ -108,13 +107,6 @@ def installed(self, probe, message=None):
108107
message or "Probe %s instrumented correctly" % probe.probe_id,
109108
)
110109

111-
def error(self, probe, message=None, exc_info=None):
112-
# type: (Probe, Optional[str], Optional[ExcInfoType]) -> None
113-
if message is None and exc_info is None:
114-
raise ValueError("Either message or exc_info must be provided")
115-
116-
if exc_info is not None and message is None:
117-
_, exc, _ = exc_info
118-
message = "Probe %s instrumentation failed: %r" % (probe.probe_id, exc)
119-
120-
self._write(probe, "ERROR", message, exc_info) # type: ignore[arg-type]
110+
def error(self, probe, error=None):
111+
# type: (Probe, Optional[ErrorInfo]) -> None
112+
self._write(probe, "ERROR", "Failed to instrument probe %s" % probe.probe_id, error)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
dynamic instrumentation: fix an issue that caused the probe instrumentation
5+
error details from being reported for visualization in the UI.

tests/debugging/probe/test_registry.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,18 @@ def test_registry_location_error():
6262
assert status_logger.queue == [
6363
{
6464
"service": "test",
65-
"message": "Unable to resolve location information for probe 42",
65+
"message": "Failed to instrument probe 42",
6666
"ddsource": "dd_debugger",
6767
"debugger": {
6868
"diagnostics": {
6969
"probeId": 42,
7070
"probeVersion": 0,
7171
"runtimeId": runtime.get_runtime_id(),
7272
"status": "ERROR",
73+
"exception": {
74+
"type": "UnresolvedLocation",
75+
"message": "Unable to resolve location information for probe 42",
76+
},
7377
}
7478
},
7579
}

tests/debugging/probe/test_remoteconfig.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ def cb(e, ps):
604604
remoteconfig_poller._poll_data()
605605

606606
status_logger.error.assert_called_once()
607-
assert status_logger.error.call_args[1]["message"] == "test exception"
607+
assert status_logger.error.call_args[1]["error"] == ("Exception", "test exception")
608608
assert status_logger.error.call_args[1]["probe"].probe_id == "error"
609609
finally:
610610
di_config.diagnostics_interval = old_interval

tests/debugging/probe/test_status.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,18 @@ def test_probe_status_error():
6969
line=36,
7070
condition=None,
7171
)
72-
message = "Probe %s installed" % probe.probe_id
7372

7473
try:
7574
raise RuntimeError("Test error")
7675
except Exception:
77-
status_logger.error(probe, message, exc_info=sys.exc_info())
76+
exc_type, exc, _ = sys.exc_info()
77+
status_logger.error(probe, (exc_type.__name__, str(exc)))
7878

7979
(entry,) = status_logger.queue
80-
assert entry["message"] == message
80+
assert entry["message"] == "Failed to instrument probe probe-instance-method"
8181
assert entry["debugger"]["diagnostics"]["probeId"] == probe.probe_id
8282
assert entry["debugger"]["diagnostics"]["status"] == "ERROR"
8383

8484
exc = entry["debugger"]["diagnostics"]["exception"]
8585
assert exc["type"] == "RuntimeError"
8686
assert exc["message"] == "Test error"
87-
assert exc["stacktrace"][0]["function"] == "test_probe_status_error"

0 commit comments

Comments
 (0)