Skip to content

Commit c4ee0db

Browse files
authored
chore(asyncio): fix flaky test_asyncio test (#5498)
This change ensures that the MainThread Task with name 'None' is run long enough to be targted by an event collector during this test. Prior to this change the test tests/profiling/collector/test_stack_asyncio.py::test_asyncio failed occasionally due to a race condition between event collectors (which scan threads) and collecting data about the MainThread with name=None. If the collectors didn't run in time to catch the MainThread, no CPU time would be recorded. After this change, a sleep() statement guarantees the thread is running long enough to be collected, and pointers have been added to enable future changes in behavior. Sample error: ``` assert wall_time_ns[t1_name] > 0 assert wall_time_ns[t2_name] > 0 if sys.platform != "win32": # Windows seems to get 0 CPU for this > assert cpu_time_found E assert False tests/profiling/collector/test_stack_asyncio.py:90: AssertionError ``` This change also modified a second test, which was failing due to referenced code lines. Prior to this change, the test_asyncio test expected the profiler to identify exactly the first line of a method. After this change, the tests ensures the profiler pointer is anywhere in the function. Testing This change was tested by increasing the collection interval (StackSampleEvent interval) from (2 x sys.setswitchinterval) to (100 x sys.setswitchinterval). These settings guaranteed 0% test success prior to the fix and 100% test success after, over 50 runs. Risks None, this is an update to testing only. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] PR description includes explicit acknowledgement/acceptance of the performance implications of this PR as reported in the benchmarks PR comment. ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment.
1 parent d9a74bd commit c4ee0db

File tree

1 file changed

+52
-13
lines changed

1 file changed

+52
-13
lines changed

tests/profiling/collector/test_stack_asyncio.py

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,69 @@
11
import asyncio
22
import collections
33
import sys
4+
import types
45

56
import pytest
67

78
from ddtrace.profiling import _asyncio
89
from ddtrace.profiling import profiler
910
from ddtrace.profiling.collector import stack_event
11+
from ddtrace.profiling.collector.stack import StackCollector
1012

1113
from . import _asyncio_compat
1214

1315

16+
def patch_stack_collector(stack_collector):
17+
"""
18+
Patch a stack collect so we can count how many times it has run
19+
"""
20+
21+
def _collect(self):
22+
self.run_count += 1
23+
return self._orig_collect()
24+
25+
stack_collector.run_count = 0
26+
orig = stack_collector.collect
27+
stack_collector._orig_collect = orig
28+
stack_collector.collect = types.MethodType(_collect, stack_collector)
29+
30+
1431
@pytest.mark.skipif(not _asyncio_compat.PY36_AND_LATER, reason="Python > 3.5 needed")
1532
def test_asyncio(tmp_path, monkeypatch) -> None:
1633
sleep_time = 0.2
17-
sleep_times = 5
1834

19-
async def stuff() -> None:
20-
await asyncio.sleep(sleep_time)
35+
async def stuff(collector) -> None:
36+
count = collector.run_count
37+
while collector.run_count == count:
38+
await asyncio.sleep(sleep_time)
2139

22-
async def hello() -> None:
23-
t1 = _asyncio_compat.create_task(stuff(), name="sleep 1")
24-
t2 = _asyncio_compat.create_task(stuff(), name="sleep 2")
25-
for _ in range(sleep_times):
26-
await stuff()
40+
async def hello(collector) -> None:
41+
t1 = _asyncio_compat.create_task(stuff(collector), name="sleep 1")
42+
t2 = _asyncio_compat.create_task(stuff(collector), name="sleep 2")
43+
await stuff(collector)
2744
return (t1, t2)
2845

2946
monkeypatch.setenv("DD_PROFILING_CAPTURE_PCT", "100")
3047
monkeypatch.setenv("DD_PROFILING_OUTPUT_PPROF", str(tmp_path / "pprof"))
3148
# start a complete profiler so asyncio policy is setup
3249
p = profiler.Profiler()
50+
stack_collector = [collector for collector in p._profiler._collectors if type(collector) == StackCollector][0]
51+
patch_stack_collector(stack_collector)
52+
3353
p.start()
3454
loop = asyncio.new_event_loop()
3555
asyncio.set_event_loop(loop)
3656
if _asyncio_compat.PY38_AND_LATER:
37-
maintask = loop.create_task(hello(), name="main")
57+
maintask = loop.create_task(hello(stack_collector), name="main")
3858
else:
39-
maintask = loop.create_task(hello())
59+
maintask = loop.create_task(hello(stack_collector))
60+
61+
# Wait for the collector to run at least once on this thread, while it is doing something
62+
# 2.5+ seconds at times
63+
count = stack_collector.run_count
64+
while count == stack_collector.run_count:
65+
pass
66+
4067
t1, t2 = loop.run_until_complete(maintask)
4168
events = p._profiler._recorder.reset()
4269
p.stop()
@@ -55,17 +82,29 @@ async def hello() -> None:
5582

5683
# This assertion does not work reliably on Python < 3.7
5784
if _asyncio_compat.PY37_AND_LATER:
85+
first_line_this_test_class = test_asyncio.__code__.co_firstlineno
86+
co_filename, lineno, co_name, class_name = event.frames[0]
5887
if event.task_name == "main":
5988
assert event.thread_name == "MainThread"
60-
assert event.frames == [(__file__, test_asyncio.__code__.co_firstlineno + 12, "hello", "")]
89+
assert len(event.frames) == 1
90+
assert co_filename == __file__
91+
assert first_line_this_test_class + 9 <= lineno <= first_line_this_test_class + 13
92+
assert co_name == "hello"
93+
assert class_name == ""
6194
assert event.nframes == 1
6295
elif event.task_name == t1_name:
6396
assert event.thread_name == "MainThread"
64-
assert event.frames == [(__file__, test_asyncio.__code__.co_firstlineno + 6, "stuff", "")]
97+
assert co_filename == __file__
98+
assert first_line_this_test_class + 4 <= lineno <= first_line_this_test_class + 7
99+
assert co_name == "stuff"
100+
assert class_name == ""
65101
assert event.nframes == 1
66102
elif event.task_name == t2_name:
67103
assert event.thread_name == "MainThread"
68-
assert event.frames == [(__file__, test_asyncio.__code__.co_firstlineno + 6, "stuff", "")]
104+
assert co_filename == __file__
105+
assert first_line_this_test_class + 4 <= lineno <= first_line_this_test_class + 7
106+
assert co_name == "stuff"
107+
assert class_name == ""
69108
assert event.nframes == 1
70109

71110
if event.thread_name == "MainThread" and event.task_name is None:

0 commit comments

Comments
 (0)