Skip to content

Commit fed11c0

Browse files
fix(debugging): report probe build error via status messages [backport 1.17] (#6452)
Backport 7356ed5 from #6402 to 1.17. Report probe build errors via status messages to ensure that they are shown in the UI and the probes are marked with the `ERROR` status, instead of `WAITING`. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Gabriele N. Tornetta <[email protected]>
1 parent 93415f8 commit fed11c0

File tree

4 files changed

+92
-19
lines changed

4 files changed

+92
-19
lines changed

ddtrace/debugging/_debugger.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,9 @@ def __init__(self, tracer=None):
245245
},
246246
on_full=self._on_encoder_buffer_full,
247247
)
248-
self._probe_registry = ProbeRegistry(self.__logger__(service_name, self._encoder))
248+
status_logger = self.__logger__(service_name, self._encoder)
249+
250+
self._probe_registry = ProbeRegistry(status_logger=status_logger)
249251
self._uploader = self.__uploader__(self._encoder)
250252
self._collector = self.__collector__(self._encoder)
251253
self._services = [self._uploader]
@@ -277,7 +279,7 @@ def __init__(self, tracer=None):
277279

278280
# Register the debugger with the RCM client.
279281
if not remoteconfig_poller.update_product_callback("LIVE_DEBUGGING", self._on_configuration):
280-
di_callback = self.__rc_adapter__(None, self._on_configuration)
282+
di_callback = self.__rc_adapter__(None, self._on_configuration, status_logger=status_logger)
281283
remoteconfig_poller.register("LIVE_DEBUGGING", di_callback)
282284

283285
log.debug("%s initialized (service name: %s)", self.__class__.__name__, service_name)

ddtrace/debugging/_probe/remoteconfig.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import sys
23
import time
34
from typing import Any
45
from typing import Callable
@@ -7,6 +8,8 @@
78
from typing import Optional
89
from typing import Type
910

11+
import six
12+
1013
from ddtrace import config as tracer_config
1114
from ddtrace.debugging._config import di_config
1215
from ddtrace.debugging._expressions import dd_compile
@@ -31,6 +34,7 @@
3134
from ddtrace.debugging._probe.model import SpanDecorationTag
3235
from ddtrace.debugging._probe.model import SpanFunctionProbe
3336
from ddtrace.debugging._probe.model import StringTemplate
37+
from ddtrace.debugging._probe.status import ProbeStatusLogger
3438
from ddtrace.internal.logger import get_logger
3539
from ddtrace.internal.remoteconfig._connectors import PublisherSubscriberConnector
3640
from ddtrace.internal.remoteconfig._publishers import RemoteConfigPublisher
@@ -240,6 +244,10 @@ def update_args(cls, args, attribs):
240244
)
241245

242246

247+
class InvalidProbeConfiguration(ValueError):
248+
pass
249+
250+
243251
def build_probe(attribs):
244252
# type: (Dict[str, Any]) -> Probe
245253
"""
@@ -249,7 +257,7 @@ def build_probe(attribs):
249257
_type = attribs["type"]
250258
_id = attribs["id"]
251259
except KeyError as e:
252-
raise ValueError("Invalid probe attributes: %s" % e)
260+
raise InvalidProbeConfiguration("Invalid probe attributes: %s" % e)
253261

254262
args = dict(
255263
probe_id=_id,
@@ -266,13 +274,23 @@ def build_probe(attribs):
266274
if _type == ProbeType.SPAN_DECORATION_PROBE:
267275
return SpanDecorationProbeFactory.build(args, attribs)
268276

269-
raise ValueError("Unsupported probe type: %s" % _type)
277+
raise InvalidProbeConfiguration("Unsupported probe type: %s" % _type)
270278

271279

272280
@_filter_by_env_and_version
273-
def get_probes(config_id, config):
274-
# type: (str, dict) -> Iterable[Probe]
275-
return [build_probe(config)]
281+
def get_probes(config, status_logger):
282+
# type: (dict, ProbeStatusLogger) -> Iterable[Probe]
283+
try:
284+
return [build_probe(config)]
285+
except InvalidProbeConfiguration:
286+
six.reraise(*sys.exc_info())
287+
except Exception as e:
288+
status_logger.error(
289+
probe=Probe(probe_id=config["id"], version=config["version"], tags={}),
290+
message=str(e),
291+
exc_info=sys.exc_info(),
292+
)
293+
return []
276294

277295

278296
class ProbePollerEvent(object):
@@ -292,10 +310,11 @@ class DebuggerRemoteConfigSubscriber(RemoteConfigSubscriber):
292310
events that can be handled easily by the debugger.
293311
"""
294312

295-
def __init__(self, data_connector, callback, name):
313+
def __init__(self, data_connector, callback, name, status_logger):
296314
super(DebuggerRemoteConfigSubscriber, self).__init__(data_connector, callback, name)
297315
self._configs = {} # type: Dict[str, Dict[str, Probe]]
298316
self._status_timestamp = time.time()
317+
self._status_logger = status_logger
299318
self._next_status_update_timestamp()
300319

301320
def _exec_callback(self, data, test_tracer=None):
@@ -342,7 +361,9 @@ def _update_probes_for_config(self, config_id, config):
342361
# type: (str, Any) -> None
343362
prev_probes = self._configs.get(config_id, {}) # type: Dict[str, Probe]
344363
next_probes = (
345-
{probe.probe_id: probe for probe in get_probes(config_id, config)} if config not in (None, False) else {}
364+
{probe.probe_id: probe for probe in get_probes(config, self._status_logger)}
365+
if config not in (None, False)
366+
else {}
346367
) # type: Dict[str, Probe]
347368
log.debug("[%s][P: %s] Dynamic Instrumentation, dispatch probe events", os.getpid(), os.getppid())
348369
self._dispatch_probe_events(prev_probes, next_probes)
@@ -358,6 +379,6 @@ class ProbeRCAdapter(PubSub):
358379
__subscriber_class__ = DebuggerRemoteConfigSubscriber
359380
__shared_data__ = PublisherSubscriberConnector()
360381

361-
def __init__(self, _preprocess_results, callback):
382+
def __init__(self, _preprocess_results, callback, status_logger):
362383
self._publisher = self.__publisher_class__(self.__shared_data__, _preprocess_results)
363-
self._subscriber = self.__subscriber_class__(self.__shared_data__, callback, "DEBUGGER")
384+
self._subscriber = self.__subscriber_class__(self.__shared_data__, callback, "DEBUGGER", status_logger)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
fixes:
3+
- |
4+
dynamic instrumentation: ensure that probes that fail to be instrumented
5+
because of invalid conditions/expressions are reported with status ``ERROR``
6+
in the UI.

tests/debugging/probe/test_remoteconfig.py

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from time import sleep
33
from uuid import uuid4
44

5+
import mock
56
import pytest
67

78
from ddtrace.debugging._config import di_config
@@ -10,6 +11,7 @@
1011
from ddtrace.debugging._probe.model import LogProbeMixin
1112
from ddtrace.debugging._probe.model import Probe
1213
from ddtrace.debugging._probe.model import ProbeType
14+
from ddtrace.debugging._probe.remoteconfig import InvalidProbeConfiguration
1315
from ddtrace.debugging._probe.remoteconfig import ProbePollerEvent
1416
from ddtrace.debugging._probe.remoteconfig import ProbeRCAdapter
1517
from ddtrace.debugging._probe.remoteconfig import _filter_by_env_and_version
@@ -25,7 +27,7 @@ def __init__(self, *args, **kwargs):
2527
self.probes = {}
2628

2729
@_filter_by_env_and_version
28-
def get_probes(self, _id, _conf):
30+
def get_probes(self, _conf, status_logger):
2931
return list(self.probes.values())
3032

3133
def add_probes(self, probes):
@@ -58,6 +60,23 @@ def mock_config():
5860
rc.get_probes = original_get_probes
5961

6062

63+
@pytest.fixture
64+
def mock_config_exc():
65+
import ddtrace.debugging._probe.remoteconfig as rc
66+
67+
original_build_probe = rc.build_probe
68+
69+
def build_probe(config):
70+
raise Exception("test exception")
71+
72+
try:
73+
rc.build_probe = build_probe
74+
75+
yield mock_config
76+
finally:
77+
rc.build_probe = original_build_probe
78+
79+
6180
@pytest.mark.parametrize(
6281
"env,version,expected",
6382
[
@@ -106,7 +125,7 @@ def callback(e, ps, *args, **kwargs):
106125
]
107126
)
108127

109-
adapter = ProbeRCAdapter(None, callback)
128+
adapter = ProbeRCAdapter(None, callback, status_logger=None)
110129
remoteconfig_poller.register("TEST", adapter, skip_enabled=True)
111130
adapter.append_and_publish({"test": random.randint(0, 11111111)}, "", config_metadata())
112131
remoteconfig_poller._poll_data()
@@ -127,7 +146,7 @@ def validate_events(expected):
127146
old_interval = di_config.diagnostics_interval
128147
di_config.diagnostics_interval = 0.5
129148
try:
130-
adapter = ProbeRCAdapter(None, cb)
149+
adapter = ProbeRCAdapter(None, cb, status_logger=None)
131150
# Wait to allow the next call to the adapter to generate a status event
132151
remoteconfig_poller.register("TEST", adapter, skip_enabled=True)
133152
adapter.append_and_publish(
@@ -181,7 +200,7 @@ def validate_events(expected):
181200
old_interval = di_config.diagnostics_interval
182201
di_config.diagnostics_interval = 0.5
183202
try:
184-
adapter = ProbeRCAdapter(None, cb)
203+
adapter = ProbeRCAdapter(None, cb, status_logger=None)
185204
# Wait to allow the next call to the adapter to generate a status event
186205
remoteconfig_poller.register("TEST", adapter, skip_enabled=True)
187206
adapter.append(
@@ -287,7 +306,7 @@ def callback(e, ps, *args, **kwargs):
287306
old_interval = di_config.diagnostics_interval
288307
di_config.diagnostics_interval = 0.5
289308
try:
290-
adapter = ProbeRCAdapter(None, callback)
309+
adapter = ProbeRCAdapter(None, callback, status_logger=None)
291310
remoteconfig_poller.register("TEST2", adapter, skip_enabled=True)
292311
adapter.append_and_publish({"test": 2}, "", metadata)
293312
remoteconfig_poller._poll_data()
@@ -341,7 +360,7 @@ def validate_events(expected):
341360
old_interval = di_config.diagnostics_interval
342361
di_config.diagnostics_interval = 0.5
343362
try:
344-
adapter = ProbeRCAdapter(None, cb)
363+
adapter = ProbeRCAdapter(None, cb, status_logger=mock.Mock())
345364
# Wait to allow the next call to the adapter to generate a status event
346365
remoteconfig_poller.register("TEST", adapter, skip_enabled=True)
347366
adapter.append_and_publish(
@@ -412,7 +431,7 @@ def validate_events(expected):
412431
# testing two things:
413432
# 1. after sleep 0.5 probe status should report 2 probes
414433
# 2. bad config raises ValueError
415-
with pytest.raises(ValueError):
434+
with pytest.raises(InvalidProbeConfiguration):
416435
adapter.append_and_publish({}, "", config_metadata("not-supported"))
417436
remoteconfig_poller._poll_data()
418437

@@ -537,7 +556,7 @@ def cb(e, ps):
537556
old_interval = di_config.diagnostics_interval
538557
di_config.diagnostics_interval = 0.5
539558
try:
540-
adapter = ProbeRCAdapter(None, cb)
559+
adapter = ProbeRCAdapter(None, cb, None)
541560
# Wait to allow the next call to the adapter to generate a status event
542561
remoteconfig_poller.register("TEST", adapter, skip_enabled=True)
543562
sleep(0.5)
@@ -570,3 +589,28 @@ def cb(e, ps):
570589
]
571590
finally:
572591
di_config.diagnostics_interval = old_interval
592+
593+
594+
def test_expression_compilation_error(remote_config_worker, mock_config_exc):
595+
events = []
596+
597+
def cb(e, ps):
598+
events.append((e, frozenset([p.probe_id if isinstance(p, Probe) else p for p in ps])))
599+
600+
metadata = config_metadata()
601+
old_interval = di_config.diagnostics_interval
602+
di_config.diagnostics_interval = 0.5
603+
try:
604+
status_logger = mock.Mock()
605+
adapter = ProbeRCAdapter(None, cb, status_logger=status_logger)
606+
# Wait to allow the next call to the adapter to generate a status event
607+
remoteconfig_poller.register("TEST", adapter, skip_enabled=True)
608+
sleep(0.5)
609+
adapter.append_and_publish({"id": "error", "version": 0}, "", metadata)
610+
remoteconfig_poller._poll_data()
611+
612+
status_logger.error.assert_called_once()
613+
assert status_logger.error.call_args[1]["message"] == "test exception"
614+
assert status_logger.error.call_args[1]["probe"].probe_id == "error"
615+
finally:
616+
di_config.diagnostics_interval = old_interval

0 commit comments

Comments
 (0)