Skip to content

Commit 8860fd6

Browse files
fix(internal): do not replace sys.modules [backport 2.0] (#7068)
Backport 2bd2c2a from #6925 to 2.0. We refactor the ModuleWatchdog class to avoid replacing the sys.modules dictionary with a proxy. Instead, we use a WeakValueDictionary to prevent adding strong references to modules that might be unloaded, and also preventing the origin mapping from growing unchecked over time. Fixes #6923. ## Testing Strategy The existing test suites should be good enough to catch any regressions from this substantial change. As for the mentioned issue, the change was tested manually on the reproducer provided in the description provided by the reporter. ## Risk This is a substantial change in behaviour for `ModuleWatchdog`. However, we expect things to run smoother now that `sys.modules` is not _hacked_, at the cost of a potentially slightly delayed memory clean-up. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. Co-authored-by: Gabriele N. Tornetta <[email protected]>
1 parent e47e748 commit 8860fd6

File tree

5 files changed

+74
-113
lines changed

5 files changed

+74
-113
lines changed

ddtrace/internal/module.py

Lines changed: 27 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import sys
88
import typing
99
from typing import cast
10+
from weakref import WeakValueDictionary as wvdict
1011

1112

1213
if typing.TYPE_CHECKING:
@@ -15,7 +16,6 @@
1516
from typing import Callable
1617
from typing import DefaultDict
1718
from typing import Dict
18-
from typing import Iterator
1919
from typing import List
2020
from typing import Optional
2121
from typing import Set
@@ -223,12 +223,11 @@ def _exec_module(self, module):
223223
callback(module)
224224

225225

226-
class ModuleWatchdog(dict):
226+
class ModuleWatchdog(object):
227227
"""Module watchdog.
228228
229-
Replace the standard ``sys.modules`` dictionary to detect when modules are
230-
loaded/unloaded. This is also responsible for triggering any registered
231-
import hooks.
229+
Hooks into the import machinery to detect when modules are loaded/unloaded.
230+
This is also responsible for triggering any registered import hooks.
232231
233232
Subclasses might customize the default behavior by overriding the
234233
``after_import`` method, which is triggered on every module import, once
@@ -240,31 +239,22 @@ class ModuleWatchdog(dict):
240239
def __init__(self):
241240
# type: () -> None
242241
self._hook_map = defaultdict(list) # type: DefaultDict[str, List[ModuleHookType]]
243-
self._om = None # type: Optional[Dict[str, ModuleType]]
244-
self._modules = sys.modules # type: Union[dict, ModuleWatchdog]
242+
self._om = None # type: Optional[wvdict[str, ModuleType]]
245243
self._finding = set() # type: Set[str]
246244
self._pre_exec_module_hooks = [] # type: List[Tuple[PreExecHookCond, PreExecHookType]]
247245

248-
def __getitem__(self, item):
249-
# type: (str) -> ModuleType
250-
return self._modules.__getitem__(item)
251-
252-
def __setitem__(self, name, module):
253-
# type: (str, ModuleType) -> None
254-
self._modules.__setitem__(name, module)
255-
256246
@property
257247
def _origin_map(self):
258-
# type: () -> Dict[str, ModuleType]
248+
# type: () -> wvdict[str, ModuleType]
259249
if self._om is None:
260250
try:
261-
self._om = {origin(module): module for module in sys.modules.values()}
251+
self._om = wvdict({origin(module): module for module in sys.modules.values()})
262252
except RuntimeError:
263253
# The state of sys.modules might have been mutated by another
264254
# thread. We try to build the full mapping at the next occasion.
265255
# For now we take the more expensive route of building a list of
266256
# the current values, which might be incomplete.
267-
return {origin(module): module for module in list(sys.modules.values())}
257+
return wvdict({origin(module): module for module in list(sys.modules.values())})
268258

269259
return self._om
270260

@@ -284,8 +274,11 @@ def _find_in_meta_path(cls):
284274
def _remove_from_meta_path(cls):
285275
# type: () -> None
286276
i = cls._find_in_meta_path()
287-
if i is not None:
288-
sys.meta_path.pop(i)
277+
278+
if i is None:
279+
raise RuntimeError("%s is not installed" % cls.__name__)
280+
281+
sys.meta_path.pop(i)
289282

290283
def after_import(self, module):
291284
# type: (ModuleType) -> None
@@ -328,48 +321,6 @@ def get_by_origin(cls, _origin):
328321

329322
return None
330323

331-
def __delitem__(self, name):
332-
# type: (str) -> None
333-
try:
334-
path = origin(sys.modules[name])
335-
# Drop the module reference to reclaim memory
336-
del self._origin_map[path]
337-
except KeyError:
338-
pass
339-
340-
self._modules.__delitem__(name)
341-
342-
def __getattribute__(self, name):
343-
# type: (str) -> Any
344-
if LEGACY_DICT_COPY and name == "keys":
345-
# This is a potential attempt to make a copy of sys.modules using
346-
# dict(sys.modules) on a Python version that uses the C API to
347-
# perform the operation. Since we are an instance of a dict, this
348-
# means that we will end up looking like the empty dict, so we take
349-
# this chance to actually look like sys.modules.
350-
# NOTE: This is a potential source of memory leaks. However, we
351-
# expect this to occur only on defunct Python versions, and only
352-
# during special code executions, like test runs.
353-
super(ModuleWatchdog, self).clear()
354-
super(ModuleWatchdog, self).update(self._modules)
355-
356-
try:
357-
return super(ModuleWatchdog, self).__getattribute__("_modules").__getattribute__(name)
358-
except AttributeError:
359-
return super(ModuleWatchdog, self).__getattribute__(name)
360-
361-
def __contains__(self, name):
362-
# type: (object) -> bool
363-
return self._modules.__contains__(name)
364-
365-
def __len__(self):
366-
# type: () -> int
367-
return self._modules.__len__()
368-
369-
def __iter__(self):
370-
# type: () -> Iterator
371-
return self._modules.__iter__()
372-
373324
def find_module(self, fullname, path=None):
374325
# type: (str, Optional[str]) -> Union[Loader, None]
375326
if fullname in self._finding:
@@ -452,6 +403,13 @@ def register_origin_hook(cls, origin, hook):
452403
instance._hook_map[path].append(hook)
453404
try:
454405
module = instance._origin_map[path]
406+
# Sanity check: the module might have been removed from sys.modules
407+
# but not yet garbage collected.
408+
try:
409+
sys.modules[module.__name__]
410+
except KeyError:
411+
del instance._origin_map[path]
412+
raise
455413
except KeyError:
456414
# The module is not loaded yet. Nothing more we can do.
457415
return
@@ -499,7 +457,7 @@ def register_module_hook(cls, module, hook):
499457
instance = cast(ModuleWatchdog, cls._instance)
500458
instance._hook_map[module].append(hook)
501459
try:
502-
module_object = instance[module]
460+
module_object = sys.modules[module]
503461
except KeyError:
504462
# The module is not loaded yet. Nothing more we can do.
505463
return
@@ -567,8 +525,8 @@ def install(cls):
567525
if cls.is_installed():
568526
raise RuntimeError("%s is already installed" % cls.__name__)
569527

570-
cls._instance = sys.modules = cls()
571-
sys.modules._add_to_meta_path()
528+
cls._instance = cls()
529+
cls._instance._add_to_meta_path()
572530
log.debug("%s installed", cls)
573531

574532
@classmethod
@@ -585,17 +543,8 @@ def uninstall(cls):
585543
class.
586544
"""
587545
cls._check_installed()
546+
cls._remove_from_meta_path()
588547

589-
parent, current = None, sys.modules
590-
while isinstance(current, ModuleWatchdog):
591-
if type(current) is cls:
592-
cls._remove_from_meta_path()
593-
if parent is not None:
594-
parent._modules = current._modules
595-
else:
596-
sys.modules = current._modules
597-
cls._instance = None
598-
log.debug("%s uninstalled", cls)
599-
return
600-
parent = current
601-
current = current._modules
548+
cls._instance = None
549+
550+
log.debug("%s uninstalled", cls)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
Resolves issues with the import machinery that might have caused the
5+
garbage collector to clean up weak references in an unexpected order.

tests/contrib/patch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,8 @@ def test_ddtrace_run_patch_on_import(self):
695695
"""
696696
import sys
697697
698+
from ddtrace.internal.module import ModuleWatchdog
699+
698700
from ddtrace.vendor.wrapt import wrap_function_wrapper as wrap
699701
700702
patched = False
@@ -710,7 +712,7 @@ def patch_wrapper(wrapped, _, args, kwrags):
710712
711713
wrap(module.__name__, module.patch.__name__, patch_wrapper)
712714
713-
sys.modules.register_module_hook("ddtrace.contrib.%s.patch", patch_hook)
715+
ModuleWatchdog.register_module_hook("ddtrace.contrib.%s.patch", patch_hook)
714716
715717
sys.stdout.write("O")
716718

tests/debugging/test_debugger.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def test_debugger_probe_new_delete(probe, trigger):
128128
d.add_probes(probe)
129129

130130
assert probe in d._probe_registry
131-
assert _get_probe_location(probe) in sys.modules._locations
131+
assert _get_probe_location(probe) in d.__watchdog__._instance._locations
132132

133133
trigger()
134134

@@ -137,7 +137,7 @@ def test_debugger_probe_new_delete(probe, trigger):
137137
# Test that the probe was ejected
138138
assert probe not in d._probe_registry
139139

140-
assert _get_probe_location(probe) not in sys.modules._locations
140+
assert _get_probe_location(probe) not in d.__watchdog__._instance._locations
141141

142142
trigger()
143143

tests/internal/test_module.py

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import json
12
import os
23
import sys
4+
from warnings import warn
35

46
import mock
57
import pytest
@@ -35,7 +37,13 @@ def ensure_no_module_watchdog():
3537
yield
3638
finally:
3739
if was_installed:
38-
ModuleWatchdog.install()
40+
if ModuleWatchdog.is_installed():
41+
warn(
42+
"ModuleWatchdog still installed after test run. This might also be caused by a test that failed "
43+
"while a ModuleWatchdog was installed."
44+
)
45+
else:
46+
ModuleWatchdog.install()
3947

4048

4149
@pytest.fixture
@@ -50,11 +58,18 @@ def module_watchdog():
5058

5159

5260
def test_watchdog_install_uninstall():
53-
assert not isinstance(sys.modules, ModuleWatchdog)
61+
assert not ModuleWatchdog.is_installed()
62+
assert not any(isinstance(m, ModuleWatchdog) for m in sys.meta_path)
63+
5464
ModuleWatchdog.install()
55-
assert isinstance(sys.modules, ModuleWatchdog)
65+
66+
assert ModuleWatchdog.is_installed()
67+
assert isinstance(sys.meta_path[0], ModuleWatchdog)
68+
5669
ModuleWatchdog.uninstall()
57-
assert not isinstance(sys.modules, ModuleWatchdog)
70+
71+
assert not ModuleWatchdog.is_installed()
72+
assert not any(isinstance(m, ModuleWatchdog) for m in sys.meta_path)
5873

5974

6075
def test_import_origin_hook_for_imported_module(module_watchdog):
@@ -156,39 +171,48 @@ def test_import_module_hook_for_module_not_yet_imported():
156171
ModuleWatchdog.uninstall()
157172

158173

159-
@pytest.mark.subprocess(env=dict(MODULE_ORIGIN=origin(tests.test_module)))
174+
@pytest.mark.subprocess(env=dict(MODULE_ORIGIN=origin(json)))
160175
def test_module_deleted():
176+
import gc
161177
import os
162178
import sys
163179

164-
from mock import mock
165-
166180
from ddtrace.internal.module import ModuleWatchdog
167181

168-
name = "tests.test_module"
182+
if "json" in sys.modules:
183+
del sys.modules["json"]
184+
gc.collect()
185+
186+
name = "json"
169187
path = os.getenv("MODULE_ORIGIN")
170-
hook = mock.Mock()
188+
189+
class Counter(object):
190+
count = 0
191+
192+
def __call__(self, _):
193+
self.count += 1
194+
195+
hook = Counter()
171196

172197
ModuleWatchdog.register_origin_hook(path, hook)
173198
ModuleWatchdog.register_module_hook(name, hook)
174199

175200
__import__(name)
176201

177-
calls = [mock.call(sys.modules[name])] * 2
178-
hook.assert_has_calls(calls)
202+
assert hook.count == 2, hook.count
179203

180204
assert path in ModuleWatchdog._instance._origin_map
181205

182206
del sys.modules[name]
207+
gc.collect()
183208

184209
assert path not in ModuleWatchdog._instance._origin_map
185210

186211
# We are not deleting the registered hooks, so if we re-import the module
187212
# new hook calls are triggered
188213
__import__(name)
189214

190-
calls.extend([mock.call(sys.modules[name])] * 2)
191-
hook.assert_has_calls(calls)
215+
assert hook.count == 4
192216

193217
ModuleWatchdog.uninstall()
194218

@@ -343,25 +367,6 @@ class Bob(BaseCollector):
343367
Alice.uninstall()
344368

345369

346-
def test_module_watchdog_dict_shallow_copy():
347-
# Save original reference to sys.modules
348-
original_sys_modules = sys.modules
349-
350-
ModuleWatchdog.install()
351-
352-
# Ensure that we have replaced sys.modules
353-
assert original_sys_modules is not sys.modules
354-
355-
# Make a shallow copy of both using the dict constructor
356-
original_modules = set(dict(original_sys_modules).keys())
357-
new_modules = set(dict(sys.modules).keys())
358-
359-
# Ensure that they match
360-
assert original_modules == new_modules
361-
362-
ModuleWatchdog.uninstall()
363-
364-
365370
@pytest.mark.skipif(sys.version_info < (3, 5), reason="LazyLoader was introduced in Python 3.5")
366371
@pytest.mark.subprocess(out="ddtrace imported\naccessing lazy module\nlazy loaded\n")
367372
def test_module_watchdog_no_lazy_force_load():

0 commit comments

Comments
 (0)