fix(ci_visibility): sys.monitoring deinstrumentation [backport 3.16] (#15023)

dd-octo-sts[bot] · gnufede · web-flow · commit 2aa6d3487182 · 2025-10-27T13:00:53.000Z
Backport 93aff64 from #14859 to 3.16. ## Description Fixes a coverage tracking performance issue by leveraging de-instrumentation after line events, and re-instrumentation between coverage collection contexts on Python 3.12+. **Problem:** The coverage tracking wasn't using `sys.monitoring` API's `DISABLE` for `LINE` events once the line coverage was tracked. **Solution:** Return `sys.monitoring.DISABLE` once a line is tracked, and call `sys.monitoring.restart_events()` when entering new coverage contexts to re-enable monitoring. ## Testing - New tests covering sequential contexts, nested contexts, dynamic imports, and nested import chains - Tests verify that coverage is complete and consistent across multiple context switches ## Risks Low - only affects Python 3.12+ coverage, uses `sys.monitoring.DISABLE` and `sys.monitoring.restart_events()` [API](https://docs.python.org/3/library/sys.monitoring.html#disabling-events), extensively tested. However, if other tool was using this API at the same time, when we call `sys.monitoring.restart_events()`, we would be re-enabling their disabled events as well. ## Additional Notes ### Performance Gain Example: The best performance gains for this PR happen when recursive code or loops are used heavily in the tested code, for example a recursive implementation of a fibonacci sequence calculator: ``` # fibonacci.py def fibonacci(n): if n <= 1: return n return fibonacci(n - 1) + fibonacci(n - 2) ``` Then running just this test: ``` # test_fibonacci.py from fibonacci import fibonacci def test_fibonacci(): assert fibonacci(35) == 9227465 ``` Yields the following results: No coverage: ``` 1 passed in 0.98s ``` current coverage (main): ``` 1 passed in 24.11s ``` new coverage (this branch): ``` 1 passed in 1.01s ``` Co-authored-by: Federico Mon <federico.mon@datadoghq.com>
diff --git a/benchmarks/coverage_fibonacci/config.yaml b/benchmarks/coverage_fibonacci/config.yaml
@@ -0,0 +1,13 @@
+# Coverage benchmark configurations for fibonacci code
+# Tests sys.monitoring.DISABLE optimization performance
+
+small: &base
+  fib_n_recursive: 10
+
+medium:
+  <<: *base
+  fib_n_recursive: 15
+
+large:
+  <<: *base
+  fib_n_recursive: 20
diff --git a/benchmarks/coverage_fibonacci/scenario.py b/benchmarks/coverage_fibonacci/scenario.py
@@ -0,0 +1,51 @@
+"""
+Benchmark for coverage collection on recursive code.
+
+This benchmark ensures that the sys.monitoring.DISABLE optimization
+doesn't regress. The DISABLE return value prevents the handler from being
+called repeatedly for the same line in recursive functions and loops.
+
+Without DISABLE: Handler called on every line execution
+With DISABLE: Handler called once per unique line
+"""
+
+from typing import Callable
+from typing import Generator
+
+import bm
+
+
+class CoverageFibonacci(bm.Scenario):
+    """
+    Benchmark coverage collection performance on recursive and iterative code.
+
+    Tests the DISABLE optimization: returning sys.monitoring.DISABLE prevents
+    the handler from being called repeatedly for the same line.
+    """
+
+    fib_n_recursive: int
+
+    def run(self) -> Generator[Callable[[int], None], None, None]:
+        import os
+        from pathlib import Path
+
+        from ddtrace.internal.coverage.code import ModuleCodeCollector
+        from ddtrace.internal.coverage.installer import install
+
+        # Install coverage
+        install(include_paths=[Path(os.getcwd())])
+
+        # Import after installation
+        from utils import fibonacci_recursive
+
+        def _(loops: int) -> None:
+            for _ in range(loops):
+                # Use coverage context to simulate real pytest per-test coverage
+                with ModuleCodeCollector.CollectInContext():
+                    # Recursive: Many function calls, same lines executed repeatedly
+                    result = fibonacci_recursive(self.fib_n_recursive)
+
+                    # Verify correctness (don't optimize away)
+                    assert result > 0
+
+        yield _
diff --git a/benchmarks/coverage_fibonacci/utils.py b/benchmarks/coverage_fibonacci/utils.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+
+def fibonacci_recursive(n):
+    if n <= 1:
+        return n
+    return fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2)
diff --git a/ddtrace/internal/coverage/code.py b/ddtrace/internal/coverage/code.py
@@ -3,6 +3,7 @@
 from copy import deepcopy
 from inspect import getmodule
 import os
+import sys
 from types import CodeType
 from types import ModuleType
 import typing as t
@@ -231,6 +232,11 @@ def __enter__(self):
             if self.is_import_coverage:
                 ctx_is_import_coverage.set(self.is_import_coverage)
 
+            # For Python 3.12+, re-enable monitoring that was disabled by previous contexts
+            # This ensures each test/suite gets accurate coverage data
+            if sys.version_info >= (3, 12):
+                sys.monitoring.restart_events()
+
             return self
 
         def __exit__(self, *args, **kwargs):
diff --git a/ddtrace/internal/coverage/instrumentation_py3_12.py b/ddtrace/internal/coverage/instrumentation_py3_12.py
@@ -21,10 +21,26 @@
 RETURN_CONST = dis.opmap["RETURN_CONST"]
 EMPTY_MODULE_BYTES = bytes([RESUME, 0, RETURN_CONST, 0])
 
+# Store: (hook, path, import_names_by_line)
 _CODE_HOOKS: t.Dict[CodeType, t.Tuple[HookType, str, t.Dict[int, t.Tuple[str, t.Optional[t.Tuple[str]]]]]] = {}
 
 
 def instrument_all_lines(code: CodeType, hook: HookType, path: str, package: str) -> t.Tuple[CodeType, CoverageLines]:
+    """
+    Instrument code for coverage tracking using Python 3.12's monitoring API.
+
+    Args:
+        code: The code object to instrument
+        hook: The hook function to call
+        path: The file path
+        package: The package name
+
+    Note: Python 3.12+ uses an optimized approach where each line callback returns DISABLE
+    after recording. This means:
+    - Each line is only reported once per coverage context (test/suite)
+    - No overhead for repeated line executions (e.g., in loops)
+    - Full line-by-line coverage data is captured
+    """
     coverage_tool = sys.monitoring.get_tool(sys.monitoring.COVERAGE_ID)
     if coverage_tool is not None and coverage_tool != "datadog":
         log.debug("Coverage tool '%s' already registered, not gathering coverage", coverage_tool)
@@ -37,10 +53,21 @@ def instrument_all_lines(code: CodeType, hook: HookType, path: str, package: str
     return _instrument_all_lines_with_monitoring(code, hook, path, package)
 
 
-def _line_event_handler(code: CodeType, line: int) -> t.Any:
-    hook, path, import_names = _CODE_HOOKS[code]
+def _line_event_handler(code: CodeType, line: int) -> t.Literal[sys.monitoring.DISABLE]:
+    hook_data = _CODE_HOOKS.get(code)
+    if hook_data is None:
+        return sys.monitoring.DISABLE
+
+    hook, path, import_names = hook_data
+
+    # Report the line and then disable monitoring for this specific line
+    # This ensures each line is only reported once per context, even if executed multiple times (e.g., in loops)
     import_name = import_names.get(line, None)
-    return hook((line, path, import_name))
+    hook((line, path, import_name))
+
+    # Return DISABLE to prevent future callbacks for this specific line
+    # This provides full line coverage with minimal overhead
+    return sys.monitoring.DISABLE
 
 
 def _register_monitoring():
diff --git a/releasenotes/notes/fix-civisibility-coverage-3-12-e9b6408d8a5dc886.yaml b/releasenotes/notes/fix-civisibility-coverage-3-12-e9b6408d8a5dc886.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    CI Visibility: This fix resolves performance issue affecting coverage collection for Python 3.12+
diff --git a/tests/coverage/included_path/constants_dynamic.py b/tests/coverage/included_path/constants_dynamic.py
@@ -0,0 +1,5 @@
+"""Constants module - imported dynamically"""
+
+# Module-level constants
+OFFSET = 10
+MULTIPLIER = 2
diff --git a/tests/coverage/included_path/constants_toplevel.py b/tests/coverage/included_path/constants_toplevel.py
@@ -0,0 +1,6 @@
+"""Constants module - imported at top level"""
+
+# Module-level constants
+MAX_VALUE = 100
+MIN_VALUE = 0
+DEFAULT_MULTIPLIER = 3
diff --git a/tests/coverage/included_path/layer2_dynamic.py b/tests/coverage/included_path/layer2_dynamic.py
@@ -0,0 +1,16 @@
+"""Layer 2 - Imported dynamically, has its own imports"""
+
+# Top-level import even though this module itself is imported dynamically
+from tests.coverage.included_path.layer3_toplevel import layer3_toplevel_function
+
+
+def layer2_dynamic_function(b):
+    # Use top-level import
+    step1 = layer3_toplevel_function(b)
+
+    # Dynamic imports - both function and constants
+    from tests.coverage.included_path.constants_dynamic import OFFSET
+    from tests.coverage.included_path.layer3_dynamic import layer3_dynamic_function
+
+    step2 = layer3_dynamic_function(step1)
+    return step2 + OFFSET - 5
diff --git a/tests/coverage/included_path/layer2_toplevel.py b/tests/coverage/included_path/layer2_toplevel.py
@@ -0,0 +1,16 @@
+"""Layer 2 - Has top-level import and dynamic import"""
+
+# Top-level imports - both function and constants
+from tests.coverage.included_path.constants_toplevel import DEFAULT_MULTIPLIER
+from tests.coverage.included_path.layer3_toplevel import layer3_toplevel_function
+
+
+def layer2_toplevel_function(a):
+    # Use the top-level imported function and constant
+    intermediate = layer3_toplevel_function(a) * DEFAULT_MULTIPLIER
+
+    # Dynamic import inside function
+    from tests.coverage.included_path.layer3_dynamic import layer3_dynamic_function
+
+    final = layer3_dynamic_function(intermediate)
+    return final
diff --git a/tests/coverage/included_path/layer3_dynamic.py b/tests/coverage/included_path/layer3_dynamic.py
@@ -0,0 +1,6 @@
+"""Layer 3 - Deepest level, imported dynamically"""
+
+
+def layer3_dynamic_function(y):
+    computed = y + 10
+    return computed * 2
diff --git a/tests/coverage/included_path/layer3_toplevel.py b/tests/coverage/included_path/layer3_toplevel.py
@@ -0,0 +1,6 @@
+"""Layer 3 - Deepest level with only top-level code"""
+
+
+def layer3_toplevel_function(x):
+    result = x * 3
+    return result
diff --git a/tests/coverage/included_path/nested_fixture.py b/tests/coverage/included_path/nested_fixture.py
@@ -0,0 +1,33 @@
+"""
+Fixture code with complex nested imports.
+
+This fixture has:
+- Top-level imports
+- Dynamic (function-level) imports
+And the imported modules themselves have more imports (both top-level and dynamic)
+"""
+
+# Top-level imports
+from tests.coverage.included_path.layer2_toplevel import layer2_toplevel_function
+
+
+def fixture_toplevel_path(value):
+    """Uses top-level imported function"""
+    result = layer2_toplevel_function(value)
+    return result
+
+
+def fixture_dynamic_path(value):
+    """Uses dynamically imported function"""
+    # Dynamic import at function level
+    from tests.coverage.included_path.layer2_dynamic import layer2_dynamic_function
+
+    result = layer2_dynamic_function(value)
+    return result
+
+
+def fixture_mixed_path(value):
+    """Uses both paths"""
+    result1 = fixture_toplevel_path(value)
+    result2 = fixture_dynamic_path(value)
+    return result1 + result2
diff --git a/tests/coverage/included_path/reinstrumentation_test_module.py b/tests/coverage/included_path/reinstrumentation_test_module.py
@@ -0,0 +1,39 @@
+"""
+Simple test module for testing coverage re-instrumentation across contexts.
+
+This module provides simple, predictable functions with known line numbers
+to help test that coverage collection works correctly across multiple contexts.
+"""
+
+
+def simple_function(x, y):
+    """A simple function with a few lines."""
+    result = x + y
+    return result
+
+
+def function_with_loop(n):
+    """A function with a loop to test repeated line execution."""
+    total = 0
+    for i in range(n):
+        total += i
+    return total
+
+
+def function_with_branches(condition):
+    """A function with branches to test different code paths."""
+    if condition:
+        result = "true_branch"
+    else:
+        result = "false_branch"
+    return result
+
+
+def multi_line_function(a, b, c):
+    """A function with multiple lines to test comprehensive coverage."""
+    step1 = a + b
+    step2 = step1 * c
+    step3 = step2 - a
+    step4 = step3 / (b if b != 0 else 1)
+    result = step4**2
+    return result
diff --git a/tests/coverage/test_constants_import_tracking.py b/tests/coverage/test_constants_import_tracking.py
diff --git a/tests/coverage/test_coverage_context_reinstrumentation.py b/tests/coverage/test_coverage_context_reinstrumentation.py
diff --git a/tests/coverage/test_instrumentation_py312_disable.py b/tests/coverage/test_instrumentation_py312_disable.py
diff --git a/tests/coverage/test_nested_dynamic_imports.py b/tests/coverage/test_nested_dynamic_imports.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +fixes:
 +  - |
 +    CI Visibility: This fix resolves performance issue affecting coverage collection for Python 3.12+