Skip to content

Commit 5f8d50f

Browse files
chore(ci_visibility): file-level coverage for py3.12+ (#15081)
## Description CI Visibility: Add a faster mode for code coverage intended for TIA, that only would track file paths for the executed python code and not per-line coverage inside them. It works only for Python 3.12+, where we use the `sys.monitoring` API. But instead of using `LINE` events, that are triggered for each line of code executed, if the env var `_DD_COVERAGE_FILE_LEVEL` is set, we would use `PY_START` events, that are triggered when the code objects start executing (but not for all their lines) Disabled by default, but can be enabled with the env var `_DD_COVERAGE_FILE_LEVEL=true`. ## Testing Tests are adapted so they are run with both modes, for file level mode we don't assert the actual lines inside the tests. ## Risks None --------- Co-authored-by: Vítor De Araújo <[email protected]>
1 parent 9ac83e6 commit 5f8d50f

File tree

9 files changed

+850
-265
lines changed

9 files changed

+850
-265
lines changed

benchmarks/coverage_fibonacci/config.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
# Coverage benchmark configurations for fibonacci code
22
# Tests sys.monitoring.DISABLE optimization performance
3+
# Configurations for both line-level and file-level coverage modes
34

5+
# Line-level coverage configurations (original mode)
46
small: &base
57
fib_n_recursive: 10
8+
env_dd_coverage_file_level: "false"
69

710
medium:
811
<<: *base
@@ -11,3 +14,16 @@ medium:
1114
large:
1215
<<: *base
1316
fib_n_recursive: 20
17+
18+
# File-level coverage configurations (lightweight mode)
19+
small_file: &base_file
20+
fib_n_recursive: 10
21+
env_dd_coverage_file_level: "true"
22+
23+
medium_file:
24+
<<: *base_file
25+
fib_n_recursive: 15
26+
27+
large_file:
28+
<<: *base_file
29+
fib_n_recursive: 20

benchmarks/coverage_fibonacci/scenario.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,12 @@ class CoverageFibonacci(bm.Scenario):
2121
2222
Tests the DISABLE optimization: returning sys.monitoring.DISABLE prevents
2323
the handler from being called repeatedly for the same line.
24+
25+
Can run in either line-level or file-level coverage mode.
2426
"""
2527

2628
fib_n_recursive: int
29+
env_dd_coverage_file_level: str
2730

2831
def run(self) -> Generator[Callable[[int], None], None, None]:
2932
import os
@@ -32,6 +35,9 @@ def run(self) -> Generator[Callable[[int], None], None, None]:
3235
from ddtrace.internal.coverage.code import ModuleCodeCollector
3336
from ddtrace.internal.coverage.installer import install
3437

38+
# Set coverage mode directly from parameter
39+
os.environ["_DD_COVERAGE_FILE_LEVEL"] = self.env_dd_coverage_file_level
40+
3541
# Install coverage
3642
install(include_paths=[Path(os.getcwd())])
3743

ddtrace/internal/coverage/instrumentation_py3_12.py

Lines changed: 111 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
1+
"""
2+
Coverage instrumentation for Python 3.12+ using sys.monitoring API.
3+
4+
This module supports two modes:
5+
1. Line-level coverage: Tracks which specific lines are executed (LINE events)
6+
2. File-level coverage: Tracks which files are executed (PY_START events)
7+
8+
The mode is controlled by the _DD_COVERAGE_FILE_LEVEL environment variable.
9+
"""
10+
111
import dis
12+
import os
213
import sys
314
from types import CodeType
415
import typing as t
516

617
from ddtrace.internal.bytecode_injection import HookType
718
from ddtrace.internal.logger import get_logger
819
from ddtrace.internal.test_visibility.coverage_lines import CoverageLines
20+
from ddtrace.internal.utils.formats import asbool
921

1022

1123
log = get_logger(__name__)
@@ -21,6 +33,11 @@
2133
RETURN_CONST = dis.opmap["RETURN_CONST"]
2234
EMPTY_MODULE_BYTES = bytes([RESUME, 0, RETURN_CONST, 0])
2335

36+
# Check if file-level coverage is requested
37+
_USE_FILE_LEVEL_COVERAGE = asbool(os.getenv("_DD_COVERAGE_FILE_LEVEL", "false"))
38+
39+
EVENT = sys.monitoring.events.PY_START if _USE_FILE_LEVEL_COVERAGE else sys.monitoring.events.LINE
40+
2441
# Store: (hook, path, import_names_by_line)
2542
_CODE_HOOKS: t.Dict[CodeType, t.Tuple[HookType, str, t.Dict[int, t.Tuple[str, t.Optional[t.Tuple[str]]]]]] = {}
2643

@@ -29,70 +46,133 @@ def instrument_all_lines(code: CodeType, hook: HookType, path: str, package: str
2946
"""
3047
Instrument code for coverage tracking using Python 3.12's monitoring API.
3148
49+
This function supports two modes based on _DD_COVERAGE_FILE_LEVEL:
50+
- Line-level (default): Uses LINE events for detailed line-by-line coverage
51+
- File-level: Uses PY_START events for faster file-level coverage
52+
3253
Args:
3354
code: The code object to instrument
3455
hook: The hook function to call
3556
path: The file path
3657
package: The package name
3758
38-
Note: Python 3.12+ uses an optimized approach where each line callback returns DISABLE
39-
after recording. This means:
40-
- Each line is only reported once per coverage context (test/suite)
41-
- No overhead for repeated line executions (e.g., in loops)
42-
- Full line-by-line coverage data is captured
59+
Returns:
60+
Tuple of (code object, CoverageLines with instrumentable lines)
61+
62+
Note: Both modes use an optimized approach where callbacks return DISABLE
63+
after recording, meaning each line/function is only reported once per coverage context.
4364
"""
4465
coverage_tool = sys.monitoring.get_tool(sys.monitoring.COVERAGE_ID)
4566
if coverage_tool is not None and coverage_tool != "datadog":
4667
log.debug("Coverage tool '%s' already registered, not gathering coverage", coverage_tool)
4768
return code, CoverageLines()
4869

4970
if coverage_tool is None:
50-
log.debug("Registering code coverage tool")
71+
mode = "file-level" if _USE_FILE_LEVEL_COVERAGE else "line-level"
72+
log.debug("Registering %s coverage tool", mode)
5173
_register_monitoring()
5274

53-
return _instrument_all_lines_with_monitoring(code, hook, path, package)
75+
return _instrument_with_monitoring(code, hook, path, package)
5476

5577

56-
def _line_event_handler(code: CodeType, line: int) -> t.Literal[sys.monitoring.DISABLE]:
78+
def _event_handler(code: CodeType, line: int) -> t.Literal[sys.monitoring.DISABLE]:
79+
"""
80+
Callback for LINE/PY_START events.
81+
Returns sys.monitoring.DISABLE to improve performance.
82+
"""
5783
hook_data = _CODE_HOOKS.get(code)
5884
if hook_data is None:
5985
return sys.monitoring.DISABLE
6086

6187
hook, path, import_names = hook_data
6288

63-
# Report the line and then disable monitoring for this specific line
64-
# This ensures each line is only reported once per context, even if executed multiple times (e.g., in loops)
65-
import_name = import_names.get(line, None)
66-
hook((line, path, import_name))
67-
68-
# Return DISABLE to prevent future callbacks for this specific line
69-
# This provides full line coverage with minimal overhead
89+
if _USE_FILE_LEVEL_COVERAGE:
90+
# Report file-level coverage using line 0 as a sentinel value
91+
# Line 0 indicates "file was executed" without specific line information
92+
hook((0, path, None))
93+
94+
# Report any import dependencies (extracted at instrumentation time from bytecode)
95+
# This ensures import tracking works even though we don't fire on individual lines
96+
for line_num, import_name in import_names.items():
97+
hook((line_num, path, import_name))
98+
else:
99+
# Report the line and then disable monitoring for this specific line
100+
# This ensures each line is only reported once per context, even if executed multiple times (e.g., in loops)
101+
import_name = import_names.get(line, None)
102+
hook((line, path, import_name))
103+
104+
# Return DISABLE to prevent future callbacks for this specific line/code
70105
return sys.monitoring.DISABLE
71106

72107

73108
def _register_monitoring():
74109
"""
75-
Register the coverage tool with the low-impact monitoring system.
110+
Register the coverage tool with the monitoring system.
111+
112+
This sets up the appropriate callback based on the coverage mode.
76113
"""
77114
sys.monitoring.use_tool_id(sys.monitoring.COVERAGE_ID, "datadog")
78-
79-
# Register the line callback
80-
sys.monitoring.register_callback(
81-
sys.monitoring.COVERAGE_ID, sys.monitoring.events.LINE, _line_event_handler
82-
) # noqa
115+
sys.monitoring.register_callback(sys.monitoring.COVERAGE_ID, EVENT, _event_handler)
83116

84117

85-
def _instrument_all_lines_with_monitoring(
118+
def _instrument_with_monitoring(
86119
code: CodeType, hook: HookType, path: str, package: str
87120
) -> t.Tuple[CodeType, CoverageLines]:
88-
# Enable local line events for the code object
89-
sys.monitoring.set_local_events(sys.monitoring.COVERAGE_ID, code, sys.monitoring.events.LINE) # noqa
121+
"""
122+
Instrument code using either LINE events for detailed line-by-line coverage or PY_START for file-level.
123+
"""
124+
# Enable local line/py_start events for the code object
125+
sys.monitoring.set_local_events(sys.monitoring.COVERAGE_ID, code, EVENT) # noqa
90126

91-
# Collect all the line numbers in the code object
92-
linestarts = dict(dis.findlinestarts(code))
127+
track_lines = not _USE_FILE_LEVEL_COVERAGE
128+
# Extract import names and collect line numbers
129+
lines, import_names = _extract_lines_and_imports(code, package, track_lines=track_lines)
130+
131+
# Recursively instrument nested code objects
132+
for nested_code in (_ for _ in code.co_consts if isinstance(_, CodeType)):
133+
_, nested_lines = instrument_all_lines(nested_code, hook, path, package)
134+
lines.update(nested_lines)
135+
136+
# Register the hook and argument for the code object
137+
_CODE_HOOKS[code] = (hook, path, import_names)
93138

139+
if _USE_FILE_LEVEL_COVERAGE:
140+
# Return CoverageLines with line 0 as sentinel to indicate file-level coverage
141+
# Line 0 means "file was instrumented/executed" without specific line details
142+
lines = CoverageLines()
143+
lines.add(0)
144+
return code, lines
145+
else:
146+
# Special case for empty modules (eg: __init__.py ):
147+
# Make sure line 0 is marked as executable, and add package dependency
148+
if not lines and code.co_name == "<module>" and code.co_code == EMPTY_MODULE_BYTES:
149+
lines.add(0)
150+
if package is not None:
151+
import_names[0] = (package, ("",))
152+
153+
return code, lines
154+
155+
156+
def _extract_lines_and_imports(
157+
code: CodeType, package: str, track_lines: bool = True
158+
) -> t.Tuple[CoverageLines, t.Dict[int, t.Tuple[str, t.Tuple[str, ...]]]]:
159+
"""
160+
Extract line numbers and import information from bytecode.
161+
162+
This parses the bytecode to:
163+
1. Collect all executable line numbers (if track_lines=True)
164+
2. Track IMPORT_NAME and IMPORT_FROM opcodes for dependency tracking
165+
166+
Args:
167+
code: The code object to analyze
168+
package: The package name for resolving relative imports
169+
track_lines: Whether to collect line numbers (True for LINE mode, False for PY_START mode)
170+
171+
Returns:
172+
Tuple of (CoverageLines with executable lines, dict mapping lines to imports)
173+
"""
94174
lines = CoverageLines()
95-
import_names: t.Dict[int, t.Tuple[str, t.Optional[t.Tuple[str, ...]]]] = {}
175+
import_names: t.Dict[int, t.Tuple[str, t.Tuple[str, ...]]] = {}
96176

97177
# The previous two arguments are kept in order to track the depth of the IMPORT_NAME
98178
# For example, from ...package import module
@@ -102,6 +182,8 @@ def _instrument_all_lines_with_monitoring(
102182
current_import_name: t.Optional[str] = None
103183
current_import_package: t.Optional[str] = None
104184

185+
# Track line numbers
186+
linestarts = dict(dis.findlinestarts(code))
105187
line: t.Optional[int] = None
106188

107189
ext: list[bytes] = []
@@ -117,7 +199,7 @@ def _instrument_all_lines_with_monitoring(
117199
if offset in linestarts:
118200
line = linestarts[offset]
119201
# Skip if line is None (bytecode that doesn't map to a specific source line)
120-
if line is not None:
202+
if line is not None and track_lines:
121203
lines.add(line)
122204

123205
# Make sure that the current module is marked as depending on its own package by instrumenting the
@@ -166,19 +248,4 @@ def _instrument_all_lines_with_monitoring(
166248
except StopIteration:
167249
pass
168250

169-
# Recursively instrument nested code objects
170-
for nested_code in (_ for _ in code.co_consts if isinstance(_, CodeType)):
171-
_, nested_lines = instrument_all_lines(nested_code, hook, path, package)
172-
lines.update(nested_lines)
173-
174-
# Register the hook and argument for the code object
175-
_CODE_HOOKS[code] = (hook, path, import_names)
176-
177-
# Special case for empty modules (eg: __init__.py ):
178-
# Make sure line 0 is marked as executable, and add package dependency
179-
if not lines and code.co_name == "<module>" and code.co_code == EMPTY_MODULE_BYTES:
180-
lines.add(0)
181-
if package is not None:
182-
import_names[0] = (package, ("",))
183-
184-
return code, lines
251+
return lines, import_names

tests/coverage/test_coverage.py

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pytest
1111

1212

13-
@pytest.mark.subprocess
13+
@pytest.mark.subprocess(parametrize={"_DD_COVERAGE_FILE_LEVEL": ["true", "false"]})
1414
def test_coverage_import_time_lib():
1515
import os
1616
from pathlib import Path
@@ -52,16 +52,31 @@ def test_coverage_import_time_lib():
5252
"tests/coverage/included_path/nested_import_time_lib.py": {1, 4},
5353
}
5454

55-
assert (
56-
executable == expected_executable
57-
), f"Executable lines mismatch: expected={expected_executable} vs actual={executable}"
58-
assert covered == expected_covered, f"Covered lines mismatch: expected={expected_covered} vs actual={covered}"
59-
assert (
60-
covered_with_imports == expected_covered_with_imports
61-
), f"Covered lines with imports mismatch: expected={expected_covered_with_imports} vs actual={covered_with_imports}"
62-
63-
64-
@pytest.mark.subprocess
55+
if os.getenv("_DD_COVERAGE_FILE_LEVEL") == "true":
56+
# In file-level mode, we only track files, not specific line numbers
57+
assert (
58+
executable.keys() == expected_executable.keys()
59+
), f"Executable files mismatch: expected={expected_executable.keys()} vs actual={executable.keys()}"
60+
assert (
61+
covered.keys() == expected_covered.keys()
62+
), f"Covered files mismatch: expected={expected_covered.keys()} vs actual={covered.keys()}"
63+
assert covered_with_imports.keys() == expected_covered_with_imports.keys(), (
64+
f"Covered files with imports mismatch: expected={expected_covered_with_imports.keys()}"
65+
f" vs actual={covered_with_imports.keys()}"
66+
)
67+
else:
68+
# In full coverage mode, we track exact line numbers
69+
assert (
70+
executable == expected_executable
71+
), f"Executable lines mismatch: expected={expected_executable} vs actual={executable}"
72+
assert covered == expected_covered, f"Covered lines mismatch: expected={expected_covered} vs actual={covered}"
73+
assert covered_with_imports == expected_covered_with_imports, (
74+
f"Covered lines with imports mismatch: expected={expected_covered_with_imports} "
75+
f"vs actual={covered_with_imports}"
76+
)
77+
78+
79+
@pytest.mark.subprocess(parametrize={"_DD_COVERAGE_FILE_LEVEL": ["true", "false"]})
6580
def test_coverage_import_time_function():
6681
import os
6782
from pathlib import Path
@@ -102,8 +117,23 @@ def test_coverage_import_time_function():
102117
"tests/coverage/included_path/imported_in_function_lib.py": {1, 2, 3, 4, 7},
103118
}
104119

105-
assert lines == expected_lines, f"Executable lines mismatch: expected={expected_lines} vs actual={lines}"
106-
assert covered == expected_covered, f"Covered lines mismatch: expected={expected_covered} vs actual={covered}"
107-
assert (
108-
covered_with_imports == expected_covered_with_imports
109-
), f"Covered lines with imports mismatch: expected={expected_covered_with_imports} vs actual={covered_with_imports}"
120+
if os.getenv("_DD_COVERAGE_FILE_LEVEL") == "true":
121+
# In file-level mode, we only track files, not specific line numbers
122+
assert (
123+
lines.keys() == expected_lines.keys()
124+
), f"Executable files mismatch: expected={expected_lines.keys()} vs actual={lines.keys()}"
125+
assert (
126+
covered.keys() == expected_covered.keys()
127+
), f"Covered files mismatch: expected={expected_covered.keys()} vs actual={covered.keys()}"
128+
assert covered_with_imports.keys() == expected_covered_with_imports.keys(), (
129+
f"Covered files with imports mismatch: expected={expected_covered_with_imports.keys()} "
130+
f"vs actual={covered_with_imports.keys()}"
131+
)
132+
else:
133+
# In full coverage mode, we track exact line numbers
134+
assert lines == expected_lines, f"Executable lines mismatch: expected={expected_lines} vs actual={lines}"
135+
assert covered == expected_covered, f"Covered lines mismatch: expected={expected_covered} vs actual={covered}"
136+
assert covered_with_imports == expected_covered_with_imports, (
137+
f"Covered lines with imports mismatch: expected={expected_covered_with_imports} "
138+
f"vs actual={covered_with_imports}"
139+
)

0 commit comments

Comments
 (0)