Skip to content

Commit 9ed842c

Browse files
P403n1x87majorgreysemmettbutlerZStriker19
authored
fix(internal): do not replace sys.modules [backport #6925 to 1.20] (#7069)
Backport of #6925 to 1.20 We refactor the ModuleWatchdog class to avoid replacing the sys.modules dictionary with a proxy. Instead, we use a WeakValueDictionary to prevent adding strong references to modules that might be unloaded, and also preventing the origin mapping from growing unchecked over time. Fixes #6923. ## Testing Strategy The existing test suites should be good enough to catch any regressions from this substantial change. As for the mentioned issue, the change was tested manually on the reproducer provided in the description provided by the reporter. ## Risk This is a substantial change in behaviour for `ModuleWatchdog`. However, we expect things to run smoother now that `sys.modules` is not _hacked_, at the cost of a potentially slightly delayed memory clean-up. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. Co-authored-by: Tahir H. Butt <[email protected]> Co-authored-by: Emmett Butler <[email protected]> Co-authored-by: Zachary Groves <[email protected]>
1 parent 1f42f64 commit 9ed842c

File tree

5 files changed

+84
-120
lines changed

5 files changed

+84
-120
lines changed

ddtrace/internal/module.py

Lines changed: 31 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from typing import Callable
1616
from typing import DefaultDict
1717
from typing import Dict
18-
from typing import Iterator
1918
from typing import List
2019
from typing import Optional
2120
from typing import Set
@@ -32,6 +31,11 @@
3231
from ddtrace.internal.utils import get_argument_value
3332

3433

34+
if PY2:
35+
wvdict = dict
36+
else:
37+
from weakref import WeakValueDictionary as wvdict
38+
3539
log = get_logger(__name__)
3640

3741

@@ -223,12 +227,11 @@ def _exec_module(self, module):
223227
callback(module)
224228

225229

226-
class ModuleWatchdog(dict):
230+
class ModuleWatchdog(object):
227231
"""Module watchdog.
228232
229-
Replace the standard ``sys.modules`` dictionary to detect when modules are
230-
loaded/unloaded. This is also responsible for triggering any registered
231-
import hooks.
233+
Hooks into the import machinery to detect when modules are loaded/unloaded.
234+
This is also responsible for triggering any registered import hooks.
232235
233236
Subclasses might customize the default behavior by overriding the
234237
``after_import`` method, which is triggered on every module import, once
@@ -240,31 +243,22 @@ class ModuleWatchdog(dict):
240243
def __init__(self):
241244
# type: () -> None
242245
self._hook_map = defaultdict(list) # type: DefaultDict[str, List[ModuleHookType]]
243-
self._om = None # type: Optional[Dict[str, ModuleType]]
244-
self._modules = sys.modules # type: Union[dict, ModuleWatchdog]
246+
self._om = None # type: Optional[wvdict[str, ModuleType]]
245247
self._finding = set() # type: Set[str]
246248
self._pre_exec_module_hooks = [] # type: List[Tuple[PreExecHookCond, PreExecHookType]]
247249

248-
def __getitem__(self, item):
249-
# type: (str) -> ModuleType
250-
return self._modules.__getitem__(item)
251-
252-
def __setitem__(self, name, module):
253-
# type: (str, ModuleType) -> None
254-
self._modules.__setitem__(name, module)
255-
256250
@property
257251
def _origin_map(self):
258-
# type: () -> Dict[str, ModuleType]
252+
# type: () -> wvdict[str, ModuleType]
259253
if self._om is None:
260254
try:
261-
self._om = {origin(module): module for module in sys.modules.values()}
255+
self._om = wvdict({origin(module): module for module in sys.modules.values()})
262256
except RuntimeError:
263257
# The state of sys.modules might have been mutated by another
264258
# thread. We try to build the full mapping at the next occasion.
265259
# For now we take the more expensive route of building a list of
266260
# the current values, which might be incomplete.
267-
return {origin(module): module for module in list(sys.modules.values())}
261+
return wvdict({origin(module): module for module in list(sys.modules.values())})
268262

269263
return self._om
270264

@@ -284,8 +278,11 @@ def _find_in_meta_path(cls):
284278
def _remove_from_meta_path(cls):
285279
# type: () -> None
286280
i = cls._find_in_meta_path()
287-
if i is not None:
288-
sys.meta_path.pop(i)
281+
282+
if i is None:
283+
raise RuntimeError("%s is not installed" % cls.__name__)
284+
285+
sys.meta_path.pop(i)
289286

290287
def after_import(self, module):
291288
# type: (ModuleType) -> None
@@ -328,48 +325,6 @@ def get_by_origin(cls, _origin):
328325

329326
return None
330327

331-
def __delitem__(self, name):
332-
# type: (str) -> None
333-
try:
334-
path = origin(sys.modules[name])
335-
# Drop the module reference to reclaim memory
336-
del self._origin_map[path]
337-
except KeyError:
338-
pass
339-
340-
self._modules.__delitem__(name)
341-
342-
def __getattribute__(self, name):
343-
# type: (str) -> Any
344-
if LEGACY_DICT_COPY and name == "keys":
345-
# This is a potential attempt to make a copy of sys.modules using
346-
# dict(sys.modules) on a Python version that uses the C API to
347-
# perform the operation. Since we are an instance of a dict, this
348-
# means that we will end up looking like the empty dict, so we take
349-
# this chance to actually look like sys.modules.
350-
# NOTE: This is a potential source of memory leaks. However, we
351-
# expect this to occur only on defunct Python versions, and only
352-
# during special code executions, like test runs.
353-
super(ModuleWatchdog, self).clear()
354-
super(ModuleWatchdog, self).update(self._modules)
355-
356-
try:
357-
return super(ModuleWatchdog, self).__getattribute__("_modules").__getattribute__(name)
358-
except AttributeError:
359-
return super(ModuleWatchdog, self).__getattribute__(name)
360-
361-
def __contains__(self, name):
362-
# type: (object) -> bool
363-
return self._modules.__contains__(name)
364-
365-
def __len__(self):
366-
# type: () -> int
367-
return self._modules.__len__()
368-
369-
def __iter__(self):
370-
# type: () -> Iterator
371-
return self._modules.__iter__()
372-
373328
def find_module(self, fullname, path=None):
374329
# type: (str, Optional[str]) -> Union[Loader, None]
375330
if fullname in self._finding:
@@ -452,6 +407,13 @@ def register_origin_hook(cls, origin, hook):
452407
instance._hook_map[path].append(hook)
453408
try:
454409
module = instance._origin_map[path]
410+
# Sanity check: the module might have been removed from sys.modules
411+
# but not yet garbage collected.
412+
try:
413+
sys.modules[module.__name__]
414+
except KeyError:
415+
del instance._origin_map[path]
416+
raise
455417
except KeyError:
456418
# The module is not loaded yet. Nothing more we can do.
457419
return
@@ -499,7 +461,7 @@ def register_module_hook(cls, module, hook):
499461
instance = cast(ModuleWatchdog, cls._instance)
500462
instance._hook_map[module].append(hook)
501463
try:
502-
module_object = instance[module]
464+
module_object = sys.modules[module]
503465
except KeyError:
504466
# The module is not loaded yet. Nothing more we can do.
505467
return
@@ -567,8 +529,8 @@ def install(cls):
567529
if cls.is_installed():
568530
raise RuntimeError("%s is already installed" % cls.__name__)
569531

570-
cls._instance = sys.modules = cls()
571-
sys.modules._add_to_meta_path()
532+
cls._instance = cls()
533+
cls._instance._add_to_meta_path()
572534
log.debug("%s installed", cls)
573535

574536
@classmethod
@@ -585,17 +547,8 @@ def uninstall(cls):
585547
class.
586548
"""
587549
cls._check_installed()
550+
cls._remove_from_meta_path()
588551

589-
parent, current = None, sys.modules
590-
while isinstance(current, ModuleWatchdog):
591-
if type(current) is cls:
592-
cls._remove_from_meta_path()
593-
if parent is not None:
594-
parent._modules = current._modules
595-
else:
596-
sys.modules = current._modules
597-
cls._instance = None
598-
log.debug("%s uninstalled", cls)
599-
return
600-
parent = current
601-
current = current._modules
552+
cls._instance = None
553+
554+
log.debug("%s uninstalled", cls)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
Resolves issues with the import machinery that might have caused the
5+
garbage collector to clean up weak references in an unexpected order.

tests/contrib/patch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,8 @@ def test_ddtrace_run_patch_on_import(self):
695695
"""
696696
import sys
697697
698+
from ddtrace.internal.module import ModuleWatchdog
699+
698700
from ddtrace.vendor.wrapt import wrap_function_wrapper as wrap
699701
700702
patched = False
@@ -710,7 +712,7 @@ def patch_wrapper(wrapped, _, args, kwrags):
710712
711713
wrap(module.__name__, module.patch.__name__, patch_wrapper)
712714
713-
sys.modules.register_module_hook("ddtrace.contrib.%s.patch", patch_hook)
715+
ModuleWatchdog.register_module_hook("ddtrace.contrib.%s.patch", patch_hook)
714716
715717
sys.stdout.write("O")
716718

tests/debugging/test_debugger.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def test_debugger_probe_new_delete(probe, trigger):
128128
d.add_probes(probe)
129129

130130
assert probe in d._probe_registry
131-
assert _get_probe_location(probe) in sys.modules._locations
131+
assert _get_probe_location(probe) in d.__watchdog__._instance._locations
132132

133133
trigger()
134134

@@ -137,7 +137,7 @@ def test_debugger_probe_new_delete(probe, trigger):
137137
# Test that the probe was ejected
138138
assert probe not in d._probe_registry
139139

140-
assert _get_probe_location(probe) not in sys.modules._locations
140+
assert _get_probe_location(probe) not in d.__watchdog__._instance._locations
141141

142142
trigger()
143143

tests/internal/test_module.py

Lines changed: 43 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import json
12
import os
23
import sys
4+
from warnings import warn
35

46
import mock
57
import pytest
@@ -35,7 +37,13 @@ def ensure_no_module_watchdog():
3537
yield
3638
finally:
3739
if was_installed:
38-
ModuleWatchdog.install()
40+
if ModuleWatchdog.is_installed():
41+
warn(
42+
"ModuleWatchdog still installed after test run. This might also be caused by a test that failed "
43+
"while a ModuleWatchdog was installed."
44+
)
45+
else:
46+
ModuleWatchdog.install()
3947

4048

4149
@pytest.fixture
@@ -50,11 +58,18 @@ def module_watchdog():
5058

5159

5260
def test_watchdog_install_uninstall():
53-
assert not isinstance(sys.modules, ModuleWatchdog)
61+
assert not ModuleWatchdog.is_installed()
62+
assert not any(isinstance(m, ModuleWatchdog) for m in sys.meta_path)
63+
5464
ModuleWatchdog.install()
55-
assert isinstance(sys.modules, ModuleWatchdog)
65+
66+
assert ModuleWatchdog.is_installed()
67+
assert isinstance(sys.meta_path[0], ModuleWatchdog)
68+
5669
ModuleWatchdog.uninstall()
57-
assert not isinstance(sys.modules, ModuleWatchdog)
70+
71+
assert not ModuleWatchdog.is_installed()
72+
assert not any(isinstance(m, ModuleWatchdog) for m in sys.meta_path)
5873

5974

6075
def test_import_origin_hook_for_imported_module(module_watchdog):
@@ -156,45 +171,54 @@ def test_import_module_hook_for_module_not_yet_imported():
156171
ModuleWatchdog.uninstall()
157172

158173

159-
@pytest.mark.subprocess(env=dict(MODULE_ORIGIN=origin(tests.test_module)))
174+
@pytest.mark.subprocess(env=dict(MODULE_ORIGIN=origin(json)))
160175
def test_module_deleted():
176+
import gc
161177
import os
162178
import sys
163179

164-
from mock import mock
165-
166180
from ddtrace.internal.module import ModuleWatchdog
167181

168-
name = "tests.test_module"
182+
if "json" in sys.modules:
183+
del sys.modules["json"]
184+
gc.collect()
185+
186+
name = "json"
169187
path = os.getenv("MODULE_ORIGIN")
170-
hook = mock.Mock()
188+
189+
class Counter(object):
190+
count = 0
191+
192+
def __call__(self, _):
193+
self.count += 1
194+
195+
hook = Counter()
171196

172197
ModuleWatchdog.register_origin_hook(path, hook)
173198
ModuleWatchdog.register_module_hook(name, hook)
174199

175200
__import__(name)
176201

177-
calls = [mock.call(sys.modules[name])] * 2
178-
hook.assert_has_calls(calls)
202+
assert hook.count == 2, hook.count
179203

180204
assert path in ModuleWatchdog._instance._origin_map
181205

182-
del sys.modules[name]
206+
if sys.version_info >= (3,):
207+
del sys.modules[name]
208+
gc.collect()
183209

184-
assert path not in ModuleWatchdog._instance._origin_map
210+
assert path not in ModuleWatchdog._instance._origin_map
185211

186-
# We are not deleting the registered hooks, so if we re-import the module
187-
# new hook calls are triggered
188-
__import__(name)
212+
# We are not deleting the registered hooks, so if we re-import the module
213+
# new hook calls are triggered
214+
__import__(name)
189215

190-
calls.extend([mock.call(sys.modules[name])] * 2)
191-
hook.assert_has_calls(calls)
216+
assert hook.count == 4
192217

193218
ModuleWatchdog.uninstall()
194219

195220

196221
def test_module_unregister_origin_hook(module_watchdog):
197-
198222
hook = mock.Mock()
199223
path = origin(sys.modules[__name__])
200224

@@ -216,7 +240,6 @@ def test_module_unregister_origin_hook(module_watchdog):
216240

217241

218242
def test_module_unregister_module_hook(module_watchdog):
219-
220243
hook = mock.Mock()
221244
module = __name__
222245
module_watchdog.register_module_hook(module, hook)
@@ -343,25 +366,6 @@ class Bob(BaseCollector):
343366
Alice.uninstall()
344367

345368

346-
def test_module_watchdog_dict_shallow_copy():
347-
# Save original reference to sys.modules
348-
original_sys_modules = sys.modules
349-
350-
ModuleWatchdog.install()
351-
352-
# Ensure that we have replaced sys.modules
353-
assert original_sys_modules is not sys.modules
354-
355-
# Make a shallow copy of both using the dict constructor
356-
original_modules = set(dict(original_sys_modules).keys())
357-
new_modules = set(dict(sys.modules).keys())
358-
359-
# Ensure that they match
360-
assert original_modules == new_modules
361-
362-
ModuleWatchdog.uninstall()
363-
364-
365369
@pytest.mark.skipif(sys.version_info < (3, 5), reason="LazyLoader was introduced in Python 3.5")
366370
@pytest.mark.subprocess(out="ddtrace imported\naccessing lazy module\nlazy loaded\n")
367371
def test_module_watchdog_no_lazy_force_load():

0 commit comments

Comments
 (0)