Skip to content

Commit 759f3f7

Browse files
authored
fix(debugging): emit probe status updates unconditionally [backport #6714 to 1.16] (#6742)
Backport of #6714 to 1.16 We ensure that probe status update are sent when due, regardless of whether the RCM worker has found new data. This fixes a regression introduced by the IPC refactor for the RCM client. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent c6dbd64 commit 759f3f7

File tree

3 files changed

+78
-120
lines changed

3 files changed

+78
-120
lines changed

ddtrace/debugging/_probe/remoteconfig.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -299,22 +299,26 @@ def __init__(self, data_connector, callback, name):
299299
self._next_status_update_timestamp()
300300

301301
def _exec_callback(self, data, test_tracer=None):
302+
# Check if it is time to re-emit probe status messages.
303+
# DEV: We use the periodic signal from the remote config client worker
304+
# thread to avoid having to spawn a separate thread for this.
305+
if time.time() > self._status_timestamp:
306+
log.debug(
307+
"[%s][P: %s] Dynamic Instrumentation,Emitting probe status log messages",
308+
os.getpid(),
309+
os.getppid(),
310+
)
311+
probes = [probe for config in self._configs.values() for probe in config.values()]
312+
self._callback(ProbePollerEvent.STATUS_UPDATE, probes)
313+
self._next_status_update_timestamp()
314+
302315
if data:
303316
metadatas = data["metadata"]
304317
rc_configs = data["config"]
305318
# DEV: We emit a status update event here to avoid having to spawn a
306319
# separate thread for this.
307320
log.debug("[%s][P: %s] Dynamic Instrumentation Updated", os.getpid(), os.getppid())
308321
for idx in range(len(rc_configs)):
309-
if time.time() > self._status_timestamp:
310-
log.debug(
311-
"[%s][P: %s] Dynamic Instrumentation,Emitting probe status log messages",
312-
os.getpid(),
313-
os.getppid(),
314-
)
315-
probes = [probe for config in self._configs.values() for probe in config.values()]
316-
self._callback(ProbePollerEvent.STATUS_UPDATE, probes)
317-
self._next_status_update_timestamp()
318322
if metadatas[idx] is None:
319323
log.debug("[%s][P: %s] Dynamic Instrumentation, no RCM metadata", os.getpid(), os.getppid())
320324
return
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
dynamic instrumentation: fixed a bug that might have caused probe status
5+
to fail to update correctly.

tests/debugging/test_debugger.py

Lines changed: 60 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import sys
44
from threading import Thread
55
from time import sleep
6-
from time import time
76

87
import mock
98
from mock.mock import call
@@ -398,7 +397,6 @@ def test_debugger_multiple_threads():
398397
good_probe(),
399398
create_snapshot_line_probe(probe_id="thread-test", source_file="tests/submod/stuff.py", line=40),
400399
)
401-
sleep(0.5)
402400

403401
callables = [Stuff().instancestuff, lambda: Stuff().propertystuff]
404402
threads = [Thread(target=callables[_ % len(callables)]) for _ in range(10)]
@@ -409,12 +407,10 @@ def test_debugger_multiple_threads():
409407
for t in threads:
410408
t.join()
411409

412-
sleep(1.5)
413-
414-
if any(len(snapshots) < len(callables) for snapshots in d.uploader.payloads):
415-
sleep(0.5)
410+
d.uploader.wait_for_payloads(
411+
lambda q: q and all(len(snapshots) >= len(callables) for snapshots in d.uploader.payloads)
412+
)
416413

417-
assert d.uploader.queue
418414
for snapshots in d.uploader.payloads[1:]:
419415
assert len(snapshots) >= len(callables)
420416
assert {s["debugger.snapshot"]["probe"]["id"] for s in snapshots} == {
@@ -618,126 +614,79 @@ def test_debugger_line_probe_on_wrapped_function(stuff):
618614
assert snapshot.probe.probe_id == "line-probe-wrapped-method"
619615

620616

621-
@pytest.mark.skipif(sys.version_info < (3, 6, 0), reason="Python 3.6+ only")
622-
def test_probe_status_logging(monkeypatch, remote_config_worker):
623-
monkeypatch.setenv("DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS", "10")
624-
remoteconfig_poller.interval = 10
625-
from ddtrace.internal.remoteconfig.client import RemoteConfigClient
626-
627-
old_request = RemoteConfigClient.request
628-
629-
def request(self, *args, **kwargs):
630-
import uuid
631-
632-
for cb in self.get_pubsubs():
633-
cb._subscriber._status_timestamp = time()
634-
cb._subscriber.interval = 0.1
635-
cb.append_and_publish({"test": str(uuid.uuid4())}, "", None)
636-
return True
637-
638-
RemoteConfigClient.request = request
617+
def test_probe_status_logging(remote_config_worker):
639618
assert remoteconfig_poller.status == ServiceStatus.STOPPED
640-
try:
641-
with rcm_endpoint(), debugger(diagnostics_interval=0.5, enabled=True) as d:
642-
d.add_probes(
643-
create_snapshot_line_probe(
644-
probe_id="line-probe-ok",
645-
source_file="tests/submod/stuff.py",
646-
line=36,
647-
condition=None,
648-
),
649-
create_snapshot_function_probe(
650-
probe_id="line-probe-error",
651-
module="tests.submod.stuff",
652-
func_qname="foo",
653-
condition=None,
654-
),
655-
)
656619

657-
queue = d.probe_status_logger.queue
658-
659-
def count_status(queue):
660-
return Counter(_["debugger"]["diagnostics"]["status"] for _ in queue)
661-
662-
sleep(0.1)
663-
assert count_status(queue) == {"INSTALLED": 1, "RECEIVED": 2, "ERROR": 1}
664-
665-
remoteconfig_poller._client.request()
666-
sleep(0.1)
667-
assert count_status(queue) == {"INSTALLED": 2, "RECEIVED": 2, "ERROR": 2}
620+
with rcm_endpoint(), debugger(diagnostics_interval=0.2, enabled=True) as d:
621+
d.add_probes(
622+
create_snapshot_line_probe(
623+
probe_id="line-probe-ok",
624+
source_file="tests/submod/stuff.py",
625+
line=36,
626+
condition=None,
627+
),
628+
create_snapshot_function_probe(
629+
probe_id="line-probe-error",
630+
module="tests.submod.stuff",
631+
func_qname="foo",
632+
condition=None,
633+
),
634+
)
668635

669-
remoteconfig_poller._client.request()
670-
sleep(0.1)
671-
assert count_status(queue) == {"INSTALLED": 3, "RECEIVED": 2, "ERROR": 3}
672-
finally:
673-
RemoteConfigClient.request = old_request
636+
logger = d.probe_status_logger
674637

638+
def count_status(queue):
639+
return Counter(_["debugger"]["diagnostics"]["status"] for _ in queue)
675640

676-
@pytest.mark.skipif(sys.version_info < (3, 6, 0), reason="Python 3.6+ only")
677-
def test_probe_status_logging_reemit_on_modify(monkeypatch, remote_config_worker):
678-
monkeypatch.setenv("DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS", "10")
679-
remoteconfig_poller.interval = 10
680-
from ddtrace.internal.remoteconfig.client import RemoteConfigClient
641+
logger.wait(lambda q: count_status(q) == {"INSTALLED": 1, "RECEIVED": 2, "ERROR": 1})
681642

682-
old_request = RemoteConfigClient.request
643+
logger.wait(lambda q: count_status(q) == {"INSTALLED": 2, "RECEIVED": 2, "ERROR": 2})
683644

684-
def request(self, *args, **kwargs):
685-
import uuid
645+
logger.wait(lambda q: count_status(q) == {"INSTALLED": 3, "RECEIVED": 2, "ERROR": 3})
686646

687-
for cb in self.get_pubsubs():
688-
cb._subscriber._status_timestamp = time()
689-
cb._subscriber.interval = 0.1
690-
cb.append_and_publish({"test": str(uuid.uuid4())}, "", None)
691-
return True
692647

693-
RemoteConfigClient.request = request
648+
def test_probe_status_logging_reemit_on_modify(remote_config_worker):
694649
assert remoteconfig_poller.status == ServiceStatus.STOPPED
695-
try:
696-
with rcm_endpoint(), debugger(diagnostics_interval=0.0, enabled=True) as d:
697-
d.add_probes(
698-
create_snapshot_line_probe(
699-
version=1,
700-
probe_id="line-probe-ok",
701-
source_file="tests/submod/stuff.py",
702-
line=36,
703-
condition=None,
704-
),
705-
)
706-
d.modify_probes(
707-
create_snapshot_line_probe(
708-
version=2,
709-
probe_id="line-probe-ok",
710-
source_file="tests/submod/stuff.py",
711-
line=36,
712-
condition=None,
713-
),
714-
)
715-
716-
logger = d.probe_status_logger
717-
queue = logger.queue
718650

719-
def count_status(queue):
720-
return Counter(_["debugger"]["diagnostics"]["status"] for _ in queue)
651+
with rcm_endpoint(), debugger(diagnostics_interval=0.2, enabled=True) as d:
652+
d.add_probes(
653+
create_snapshot_line_probe(
654+
version=1,
655+
probe_id="line-probe-ok",
656+
source_file="tests/submod/stuff.py",
657+
line=36,
658+
condition=None,
659+
),
660+
)
661+
d.modify_probes(
662+
create_snapshot_line_probe(
663+
version=2,
664+
probe_id="line-probe-ok",
665+
source_file="tests/submod/stuff.py",
666+
line=36,
667+
condition=None,
668+
),
669+
)
721670

722-
def versions(queue, status):
723-
return [
724-
_["debugger"]["diagnostics"]["probeVersion"]
725-
for _ in queue
726-
if _["debugger"]["diagnostics"]["status"] == status
727-
]
671+
logger = d.probe_status_logger
672+
queue = logger.queue
728673

729-
logger.wait(lambda q: count_status(q) == {"INSTALLED": 2, "RECEIVED": 1})
730-
assert versions(queue, "INSTALLED") == [1, 2]
731-
assert versions(queue, "RECEIVED") == [1]
674+
def count_status(queue):
675+
return Counter(_["debugger"]["diagnostics"]["status"] for _ in queue)
732676

733-
logger.clear()
734-
remoteconfig_poller._client.request()
677+
def versions(queue, status):
678+
return [
679+
_["debugger"]["diagnostics"]["probeVersion"]
680+
for _ in queue
681+
if _["debugger"]["diagnostics"]["status"] == status
682+
]
735683

736-
logger.wait(lambda q: count_status(q) == {"INSTALLED": 1})
737-
assert versions(queue, "INSTALLED") == [2]
684+
logger.wait(lambda q: count_status(q) == {"INSTALLED": 2, "RECEIVED": 1})
685+
assert versions(queue, "INSTALLED") == [1, 2]
686+
assert versions(queue, "RECEIVED") == [1]
738687

739-
finally:
740-
RemoteConfigClient.request = old_request
688+
logger.wait(lambda q: count_status(q) == {"INSTALLED": 3, "RECEIVED": 1})
689+
assert versions(queue, "INSTALLED") == [1, 2, 2]
741690

742691

743692
@pytest.mark.parametrize("duration", [1e5, 1e6, 1e7])

0 commit comments

Comments
 (0)