Skip to content

Commit 67636f7

Browse files
authored
pythongh-138709: Implement CPU time profiling in profiling.sample (python#138710)
1 parent d06113c commit 67636f7

13 files changed

+971
-38
lines changed

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,7 @@ struct _Py_global_strings {
758758
STRUCT_FOR_ID(size)
759759
STRUCT_FOR_ID(sizehint)
760760
STRUCT_FOR_ID(skip_file_prefixes)
761+
STRUCT_FOR_ID(skip_non_matching_threads)
761762
STRUCT_FOR_ID(sleep)
762763
STRUCT_FOR_ID(sock)
763764
STRUCT_FOR_ID(sort)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/profiling/sampling/collector.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
from abc import ABC, abstractmethod
22

3+
# Enums are slow
4+
THREAD_STATE_RUNNING = 0
5+
THREAD_STATE_IDLE = 1
6+
THREAD_STATE_GIL_WAIT = 2
7+
THREAD_STATE_UNKNOWN = 3
8+
9+
STATUS = {
10+
THREAD_STATE_RUNNING: "running",
11+
THREAD_STATE_IDLE: "idle",
12+
THREAD_STATE_GIL_WAIT: "gil_wait",
13+
THREAD_STATE_UNKNOWN: "unknown",
14+
}
315

416
class Collector(ABC):
517
@abstractmethod
@@ -10,10 +22,12 @@ def collect(self, stack_frames):
1022
def export(self, filename):
1123
"""Export collected data to a file."""
1224

13-
def _iter_all_frames(self, stack_frames):
25+
def _iter_all_frames(self, stack_frames, skip_idle=False):
1426
"""Iterate over all frame stacks from all interpreters and threads."""
1527
for interpreter_info in stack_frames:
1628
for thread_info in interpreter_info.threads:
29+
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
30+
continue
1731
frames = thread_info.frame_info
1832
if frames:
1933
yield frames

Lib/profiling/sampling/pstats_collector.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
class PstatsCollector(Collector):
8-
def __init__(self, sample_interval_usec):
8+
def __init__(self, sample_interval_usec, *, skip_idle=False):
99
self.result = collections.defaultdict(
1010
lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0)
1111
)
@@ -14,6 +14,7 @@ def __init__(self, sample_interval_usec):
1414
self.callers = collections.defaultdict(
1515
lambda: collections.defaultdict(int)
1616
)
17+
self.skip_idle = skip_idle
1718

1819
def _process_frames(self, frames):
1920
"""Process a single thread's frame stack."""
@@ -40,7 +41,7 @@ def _process_frames(self, frames):
4041
self.callers[callee][caller] += 1
4142

4243
def collect(self, stack_frames):
43-
for frames in self._iter_all_frames(stack_frames):
44+
for frames in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
4445
self._process_frames(frames)
4546

4647
def export(self, filename):

Lib/profiling/sampling/sample.py

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,21 @@
1515
from .stack_collector import CollapsedStackCollector, FlamegraphCollector
1616

1717
_FREE_THREADED_BUILD = sysconfig.get_config_var("Py_GIL_DISABLED") is not None
18+
19+
# Profiling mode constants
20+
PROFILING_MODE_WALL = 0
21+
PROFILING_MODE_CPU = 1
22+
PROFILING_MODE_GIL = 2
23+
24+
25+
def _parse_mode(mode_string):
26+
"""Convert mode string to mode constant."""
27+
mode_map = {
28+
"wall": PROFILING_MODE_WALL,
29+
"cpu": PROFILING_MODE_CPU,
30+
"gil": PROFILING_MODE_GIL,
31+
}
32+
return mode_map[mode_string]
1833
_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data.
1934
Supports the following target modes:
2035
- -p PID: Profile an existing process by PID
@@ -120,18 +135,18 @@ def _run_with_sync(original_cmd):
120135

121136

122137
class SampleProfiler:
123-
def __init__(self, pid, sample_interval_usec, all_threads):
138+
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL):
124139
self.pid = pid
125140
self.sample_interval_usec = sample_interval_usec
126141
self.all_threads = all_threads
127142
if _FREE_THREADED_BUILD:
128143
self.unwinder = _remote_debugging.RemoteUnwinder(
129-
self.pid, all_threads=self.all_threads
144+
self.pid, all_threads=self.all_threads, mode=mode
130145
)
131146
else:
132147
only_active_threads = bool(self.all_threads)
133148
self.unwinder = _remote_debugging.RemoteUnwinder(
134-
self.pid, only_active_thread=only_active_threads
149+
self.pid, only_active_thread=only_active_threads, mode=mode
135150
)
136151
# Track sample intervals and total sample count
137152
self.sample_intervals = deque(maxlen=100)
@@ -596,21 +611,25 @@ def sample(
596611
show_summary=True,
597612
output_format="pstats",
598613
realtime_stats=False,
614+
mode=PROFILING_MODE_WALL,
599615
):
600616
profiler = SampleProfiler(
601-
pid, sample_interval_usec, all_threads=all_threads
617+
pid, sample_interval_usec, all_threads=all_threads, mode=mode
602618
)
603619
profiler.realtime_stats = realtime_stats
604620

621+
# Determine skip_idle for collector compatibility
622+
skip_idle = mode != PROFILING_MODE_WALL
623+
605624
collector = None
606625
match output_format:
607626
case "pstats":
608-
collector = PstatsCollector(sample_interval_usec)
627+
collector = PstatsCollector(sample_interval_usec, skip_idle=skip_idle)
609628
case "collapsed":
610-
collector = CollapsedStackCollector()
629+
collector = CollapsedStackCollector(skip_idle=skip_idle)
611630
filename = filename or f"collapsed.{pid}.txt"
612631
case "flamegraph":
613-
collector = FlamegraphCollector()
632+
collector = FlamegraphCollector(skip_idle=skip_idle)
614633
filename = filename or f"flamegraph.{pid}.html"
615634
case _:
616635
raise ValueError(f"Invalid output format: {output_format}")
@@ -661,6 +680,8 @@ def wait_for_process_and_sample(pid, sort_value, args):
661680
if not filename and args.format == "collapsed":
662681
filename = f"collapsed.{pid}.txt"
663682

683+
mode = _parse_mode(args.mode)
684+
664685
sample(
665686
pid,
666687
sort=sort_value,
@@ -672,6 +693,7 @@ def wait_for_process_and_sample(pid, sort_value, args):
672693
show_summary=not args.no_summary,
673694
output_format=args.format,
674695
realtime_stats=args.realtime_stats,
696+
mode=mode,
675697
)
676698

677699

@@ -726,6 +748,15 @@ def main():
726748
help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
727749
)
728750

751+
# Mode options
752+
mode_group = parser.add_argument_group("Mode options")
753+
mode_group.add_argument(
754+
"--mode",
755+
choices=["wall", "cpu", "gil"],
756+
default="wall",
757+
help="Sampling mode: wall (all threads), cpu (only CPU-running threads), gil (only GIL-holding threads)",
758+
)
759+
729760
# Output format selection
730761
output_group = parser.add_argument_group("Output options")
731762
output_format = output_group.add_mutually_exclusive_group()
@@ -850,6 +881,8 @@ def main():
850881
elif target_count > 1:
851882
parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
852883

884+
mode = _parse_mode(args.mode)
885+
853886
if args.pid:
854887
sample(
855888
args.pid,
@@ -862,6 +895,7 @@ def main():
862895
show_summary=not args.no_summary,
863896
output_format=args.format,
864897
realtime_stats=args.realtime_stats,
898+
mode=mode,
865899
)
866900
elif args.module or args.args:
867901
if args.module:

Lib/profiling/sampling/stack_collector.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111

1212

1313
class StackTraceCollector(Collector):
14-
def collect(self, stack_frames):
15-
for frames in self._iter_all_frames(stack_frames):
14+
def __init__(self, *, skip_idle=False):
15+
self.skip_idle = skip_idle
16+
17+
def collect(self, stack_frames, skip_idle=False):
18+
for frames in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
1619
if not frames:
1720
continue
1821
self.process_frames(frames)
@@ -22,7 +25,8 @@ def process_frames(self, frames):
2225

2326

2427
class CollapsedStackCollector(StackTraceCollector):
25-
def __init__(self):
28+
def __init__(self, *args, **kwargs):
29+
super().__init__(*args, **kwargs)
2630
self.stack_counter = collections.Counter()
2731

2832
def process_frames(self, frames):
@@ -46,7 +50,8 @@ def export(self, filename):
4650

4751

4852
class FlamegraphCollector(StackTraceCollector):
49-
def __init__(self):
53+
def __init__(self, *args, **kwargs):
54+
super().__init__(*args, **kwargs)
5055
self.stats = {}
5156
self._root = {"samples": 0, "children": {}}
5257
self._total_samples = 0

0 commit comments

Comments
 (0)