Skip to content

Commit 5871f0d

Browse files
authored
fix(internal): do not replace sys.modules [backport #6925 to 1.19] (#7070)
Backport of #6925 to 1.19 We refactor the ModuleWatchdog class to avoid replacing the sys.modules dictionary with a proxy. Instead, we use a WeakValueDictionary to prevent adding strong references to modules that might be unloaded, and also preventing the origin mapping from growing unchecked over time. Fixes #6923. ## Testing Strategy The existing test suites should be good enough to catch any regressions from this substantial change. As for the mentioned issue, the change was tested manually on the reproducer provided in the description provided by the reporter. ## Risk This is a substantial change in behaviour for `ModuleWatchdog`. However, we expect things to run smoother now that `sys.modules` is not _hacked_, at the cost of a potentially slightly delayed memory clean-up. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that.
1 parent 29b1eda commit 5871f0d

File tree

5 files changed

+85
-118
lines changed

5 files changed

+85
-118
lines changed

ddtrace/internal/module.py

Lines changed: 32 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from typing import Callable
1616
from typing import DefaultDict
1717
from typing import Dict
18-
from typing import Iterator
1918
from typing import List
2019
from typing import Optional
2120
from typing import Set
@@ -32,6 +31,12 @@
3231
from ddtrace.internal.utils import get_argument_value
3332

3433

34+
if PY2:
35+
wvdict = dict
36+
else:
37+
from weakref import WeakValueDictionary as wvdict
38+
39+
3540
log = get_logger(__name__)
3641

3742

@@ -223,12 +228,11 @@ def _exec_module(self, module):
223228
callback(module)
224229

225230

226-
class ModuleWatchdog(dict):
231+
class ModuleWatchdog(object):
227232
"""Module watchdog.
228233
229-
Replace the standard ``sys.modules`` dictionary to detect when modules are
230-
loaded/unloaded. This is also responsible for triggering any registered
231-
import hooks.
234+
Hooks into the import machinery to detect when modules are loaded/unloaded.
235+
This is also responsible for triggering any registered import hooks.
232236
233237
Subclasses might customize the default behavior by overriding the
234238
``after_import`` method, which is triggered on every module import, once
@@ -240,31 +244,22 @@ class ModuleWatchdog(dict):
240244
def __init__(self):
241245
# type: () -> None
242246
self._hook_map = defaultdict(list) # type: DefaultDict[str, List[ModuleHookType]]
243-
self._om = None # type: Optional[Dict[str, ModuleType]]
244-
self._modules = sys.modules # type: Union[dict, ModuleWatchdog]
247+
self._om = None # type: Optional[wvdict[str, ModuleType]]
245248
self._finding = set() # type: Set[str]
246249
self._pre_exec_module_hooks = [] # type: List[Tuple[PreExecHookCond, PreExecHookType]]
247250

248-
def __getitem__(self, item):
249-
# type: (str) -> ModuleType
250-
return self._modules.__getitem__(item)
251-
252-
def __setitem__(self, name, module):
253-
# type: (str, ModuleType) -> None
254-
self._modules.__setitem__(name, module)
255-
256251
@property
257252
def _origin_map(self):
258-
# type: () -> Dict[str, ModuleType]
253+
# type: () -> wvdict[str, ModuleType]
259254
if self._om is None:
260255
try:
261-
self._om = {origin(module): module for module in sys.modules.values()}
256+
self._om = wvdict({origin(module): module for module in sys.modules.values()})
262257
except RuntimeError:
263258
# The state of sys.modules might have been mutated by another
264259
# thread. We try to build the full mapping at the next occasion.
265260
# For now we take the more expensive route of building a list of
266261
# the current values, which might be incomplete.
267-
return {origin(module): module for module in list(sys.modules.values())}
262+
return wvdict({origin(module): module for module in list(sys.modules.values())})
268263

269264
return self._om
270265

@@ -284,8 +279,11 @@ def _find_in_meta_path(cls):
284279
def _remove_from_meta_path(cls):
285280
# type: () -> None
286281
i = cls._find_in_meta_path()
287-
if i is not None:
288-
sys.meta_path.pop(i)
282+
283+
if i is None:
284+
raise RuntimeError("%s is not installed" % cls.__name__)
285+
286+
sys.meta_path.pop(i)
289287

290288
def after_import(self, module):
291289
# type: (ModuleType) -> None
@@ -328,48 +326,6 @@ def get_by_origin(cls, _origin):
328326

329327
return None
330328

331-
def __delitem__(self, name):
332-
# type: (str) -> None
333-
try:
334-
path = origin(sys.modules[name])
335-
# Drop the module reference to reclaim memory
336-
del self._origin_map[path]
337-
except KeyError:
338-
pass
339-
340-
self._modules.__delitem__(name)
341-
342-
def __getattribute__(self, name):
343-
# type: (str) -> Any
344-
if LEGACY_DICT_COPY and name == "keys":
345-
# This is a potential attempt to make a copy of sys.modules using
346-
# dict(sys.modules) on a Python version that uses the C API to
347-
# perform the operation. Since we are an instance of a dict, this
348-
# means that we will end up looking like the empty dict, so we take
349-
# this chance to actually look like sys.modules.
350-
# NOTE: This is a potential source of memory leaks. However, we
351-
# expect this to occur only on defunct Python versions, and only
352-
# during special code executions, like test runs.
353-
super(ModuleWatchdog, self).clear()
354-
super(ModuleWatchdog, self).update(self._modules)
355-
356-
try:
357-
return super(ModuleWatchdog, self).__getattribute__("_modules").__getattribute__(name)
358-
except AttributeError:
359-
return super(ModuleWatchdog, self).__getattribute__(name)
360-
361-
def __contains__(self, name):
362-
# type: (object) -> bool
363-
return self._modules.__contains__(name)
364-
365-
def __len__(self):
366-
# type: () -> int
367-
return self._modules.__len__()
368-
369-
def __iter__(self):
370-
# type: () -> Iterator
371-
return self._modules.__iter__()
372-
373329
def find_module(self, fullname, path=None):
374330
# type: (str, Optional[str]) -> Union[Loader, None]
375331
if fullname in self._finding:
@@ -452,6 +408,13 @@ def register_origin_hook(cls, origin, hook):
452408
instance._hook_map[path].append(hook)
453409
try:
454410
module = instance._origin_map[path]
411+
# Sanity check: the module might have been removed from sys.modules
412+
# but not yet garbage collected.
413+
try:
414+
sys.modules[module.__name__]
415+
except KeyError:
416+
del instance._origin_map[path]
417+
raise
455418
except KeyError:
456419
# The module is not loaded yet. Nothing more we can do.
457420
return
@@ -499,7 +462,7 @@ def register_module_hook(cls, module, hook):
499462
instance = cast(ModuleWatchdog, cls._instance)
500463
instance._hook_map[module].append(hook)
501464
try:
502-
module_object = instance[module]
465+
module_object = sys.modules[module]
503466
except KeyError:
504467
# The module is not loaded yet. Nothing more we can do.
505468
return
@@ -567,8 +530,8 @@ def install(cls):
567530
if cls.is_installed():
568531
raise RuntimeError("%s is already installed" % cls.__name__)
569532

570-
cls._instance = sys.modules = cls()
571-
sys.modules._add_to_meta_path()
533+
cls._instance = cls()
534+
cls._instance._add_to_meta_path()
572535
log.debug("%s installed", cls)
573536

574537
@classmethod
@@ -585,17 +548,8 @@ def uninstall(cls):
585548
class.
586549
"""
587550
cls._check_installed()
551+
cls._remove_from_meta_path()
588552

589-
parent, current = None, sys.modules
590-
while isinstance(current, ModuleWatchdog):
591-
if type(current) is cls:
592-
cls._remove_from_meta_path()
593-
if parent is not None:
594-
parent._modules = current._modules
595-
else:
596-
sys.modules = current._modules
597-
cls._instance = None
598-
log.debug("%s uninstalled", cls)
599-
return
600-
parent = current
601-
current = current._modules
553+
cls._instance = None
554+
555+
log.debug("%s uninstalled", cls)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
Resolves issues with the import machinery that might have caused the
5+
garbage collector to clean up weak references in an unexpected order.

tests/contrib/patch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,8 @@ def test_ddtrace_run_patch_on_import(self):
695695
"""
696696
import sys
697697
698+
from ddtrace.internal.module import ModuleWatchdog
699+
698700
from ddtrace.vendor.wrapt import wrap_function_wrapper as wrap
699701
700702
patched = False
@@ -710,7 +712,7 @@ def patch_wrapper(wrapped, _, args, kwrags):
710712
711713
wrap(module.__name__, module.patch.__name__, patch_wrapper)
712714
713-
sys.modules.register_module_hook("ddtrace.contrib.%s.patch", patch_hook)
715+
ModuleWatchdog.register_module_hook("ddtrace.contrib.%s.patch", patch_hook)
714716
715717
sys.stdout.write("O")
716718

tests/debugging/test_debugger.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def test_debugger_probe_new_delete(probe, trigger):
128128
d.add_probes(probe)
129129

130130
assert probe in d._probe_registry
131-
assert _get_probe_location(probe) in sys.modules._locations
131+
assert _get_probe_location(probe) in d.__watchdog__._instance._locations
132132

133133
trigger()
134134

@@ -137,7 +137,7 @@ def test_debugger_probe_new_delete(probe, trigger):
137137
# Test that the probe was ejected
138138
assert probe not in d._probe_registry
139139

140-
assert _get_probe_location(probe) not in sys.modules._locations
140+
assert _get_probe_location(probe) not in d.__watchdog__._instance._locations
141141

142142
trigger()
143143

tests/internal/test_module.py

Lines changed: 43 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import json
12
import os
23
import sys
4+
from warnings import warn
35

46
import mock
57
import pytest
@@ -35,7 +37,13 @@ def ensure_no_module_watchdog():
3537
yield
3638
finally:
3739
if was_installed:
38-
ModuleWatchdog.install()
40+
if ModuleWatchdog.is_installed():
41+
warn(
42+
"ModuleWatchdog still installed after test run. This might also be caused by a test that failed "
43+
"while a ModuleWatchdog was installed."
44+
)
45+
else:
46+
ModuleWatchdog.install()
3947

4048

4149
@pytest.fixture
@@ -50,11 +58,18 @@ def module_watchdog():
5058

5159

5260
def test_watchdog_install_uninstall():
53-
assert not isinstance(sys.modules, ModuleWatchdog)
61+
assert not ModuleWatchdog.is_installed()
62+
assert not any(isinstance(m, ModuleWatchdog) for m in sys.meta_path)
63+
5464
ModuleWatchdog.install()
55-
assert isinstance(sys.modules, ModuleWatchdog)
65+
66+
assert ModuleWatchdog.is_installed()
67+
assert isinstance(sys.meta_path[0], ModuleWatchdog)
68+
5669
ModuleWatchdog.uninstall()
57-
assert not isinstance(sys.modules, ModuleWatchdog)
70+
71+
assert not ModuleWatchdog.is_installed()
72+
assert not any(isinstance(m, ModuleWatchdog) for m in sys.meta_path)
5873

5974

6075
def test_import_origin_hook_for_imported_module(module_watchdog):
@@ -156,39 +171,49 @@ def test_import_module_hook_for_module_not_yet_imported():
156171
ModuleWatchdog.uninstall()
157172

158173

159-
@pytest.mark.subprocess(env=dict(MODULE_ORIGIN=origin(tests.test_module)))
174+
@pytest.mark.subprocess(env=dict(MODULE_ORIGIN=origin(json)))
160175
def test_module_deleted():
176+
import gc
161177
import os
162178
import sys
163179

164-
from mock import mock
165-
166180
from ddtrace.internal.module import ModuleWatchdog
167181

168-
name = "tests.test_module"
182+
if "json" in sys.modules:
183+
del sys.modules["json"]
184+
gc.collect()
185+
186+
name = "json"
169187
path = os.getenv("MODULE_ORIGIN")
170-
hook = mock.Mock()
188+
189+
class Counter(object):
190+
count = 0
191+
192+
def __call__(self, _):
193+
self.count += 1
194+
195+
hook = Counter()
171196

172197
ModuleWatchdog.register_origin_hook(path, hook)
173198
ModuleWatchdog.register_module_hook(name, hook)
174199

175200
__import__(name)
176201

177-
calls = [mock.call(sys.modules[name])] * 2
178-
hook.assert_has_calls(calls)
202+
assert hook.count == 2, hook.count
179203

180204
assert path in ModuleWatchdog._instance._origin_map
181205

182-
del sys.modules[name]
206+
if sys.version_info >= (3,):
207+
del sys.modules[name]
208+
gc.collect()
183209

184-
assert path not in ModuleWatchdog._instance._origin_map
210+
assert path not in ModuleWatchdog._instance._origin_map
185211

186-
# We are not deleting the registered hooks, so if we re-import the module
187-
# new hook calls are triggered
188-
__import__(name)
212+
# We are not deleting the registered hooks, so if we re-import the module
213+
# new hook calls are triggered
214+
__import__(name)
189215

190-
calls.extend([mock.call(sys.modules[name])] * 2)
191-
hook.assert_has_calls(calls)
216+
assert hook.count == 4
192217

193218
ModuleWatchdog.uninstall()
194219

@@ -343,25 +368,6 @@ class Bob(BaseCollector):
343368
Alice.uninstall()
344369

345370

346-
def test_module_watchdog_dict_shallow_copy():
347-
# Save original reference to sys.modules
348-
original_sys_modules = sys.modules
349-
350-
ModuleWatchdog.install()
351-
352-
# Ensure that we have replaced sys.modules
353-
assert original_sys_modules is not sys.modules
354-
355-
# Make a shallow copy of both using the dict constructor
356-
original_modules = set(dict(original_sys_modules).keys())
357-
new_modules = set(dict(sys.modules).keys())
358-
359-
# Ensure that they match
360-
assert original_modules == new_modules
361-
362-
ModuleWatchdog.uninstall()
363-
364-
365371
@pytest.mark.skipif(sys.version_info < (3, 5), reason="LazyLoader was introduced in Python 3.5")
366372
@pytest.mark.subprocess(out="ddtrace imported\naccessing lazy module\nlazy loaded\n")
367373
def test_module_watchdog_no_lazy_force_load():

0 commit comments

Comments
 (0)