Skip to content

Commit c3cf7c7

Browse files
committed
PROFILING DIFF
1 parent 2750f3c commit c3cf7c7

File tree

15 files changed

+779
-377
lines changed

15 files changed

+779
-377
lines changed

Include/cpython/pystate.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,30 @@ struct _ts {
246246

247247
/* The bottom-most frame on the stack. */
248248
_PyCFrame root_cframe;
249+
250+
/* -------- Fields below this line are new for profiler support --------
251+
These fields MUST remain at the end of the struct to preserve
252+
ABI compatibility with external profilers (py-spy, Austin) that
253+
read the struct with hardcoded offsets. */
254+
255+
/* Pointer to currently executing frame (direct access for profilers).
256+
This mirrors cframe->current_frame but provides direct access for
257+
external profilers that read process memory. */
258+
struct _PyInterpreterFrame *current_frame;
259+
260+
/* Pointer to the base frame (bottommost sentinel frame).
261+
Used by profilers to validate complete stack unwinding.
262+
Points to the embedded base_frame in _PyThreadStateImpl.
263+
The frame is embedded there rather than here because _PyInterpreterFrame
264+
is defined in internal headers that cannot be exposed in the public API. */
265+
struct _PyInterpreterFrame *base_frame;
266+
267+
/* Last frame sampled by a profiler. Used for frame caching optimization. */
268+
struct _PyInterpreterFrame *last_profiled_frame;
269+
270+
/* GIL state for profilers. */
271+
int holds_gil;
272+
int gil_requested;
249273
};
250274

251275
/* WASI has limited call stack. Python's recursion limit depends on code
Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#ifndef Py_INTERNAL_DEBUG_OFFSETS_H
2+
#define Py_INTERNAL_DEBUG_OFFSETS_H
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
#ifndef Py_BUILD_CORE
8+
# error "this header requires Py_BUILD_CORE define"
9+
#endif
10+
11+
#include <stdint.h>
12+
13+
#define _Py_Debug_Cookie "xdebugpy"
14+
15+
typedef struct _Py_DebugOffsets {
16+
char cookie[9];
17+
uint64_t version;
18+
uint64_t free_threaded; // Always 0 for 3.12 (no free-threading support)
19+
20+
// Runtime state offset
21+
struct {
22+
uint64_t size;
23+
uint64_t finalizing;
24+
uint64_t interpreters_head;
25+
} runtime_state;
26+
27+
// Interpreter state offset
28+
struct {
29+
uint64_t size;
30+
uint64_t id;
31+
uint64_t next;
32+
uint64_t threads_head;
33+
uint64_t gc;
34+
uint64_t sysdict;
35+
uint64_t builtins;
36+
uint64_t ceval_gil;
37+
uint64_t gil_runtime_state;
38+
uint64_t gil_runtime_state_locked;
39+
uint64_t gil_runtime_state_holder;
40+
} interpreter_state;
41+
42+
// Thread state offset
43+
struct {
44+
uint64_t size;
45+
uint64_t prev;
46+
uint64_t next;
47+
uint64_t interp;
48+
uint64_t current_frame;
49+
uint64_t base_frame;
50+
uint64_t last_profiled_frame;
51+
uint64_t thread_id;
52+
uint64_t native_thread_id;
53+
uint64_t datastack_chunk;
54+
uint64_t status;
55+
uint64_t current_exception;
56+
uint64_t exc_state;
57+
uint64_t holds_gil;
58+
uint64_t gil_requested;
59+
} thread_state;
60+
61+
// Exception stack item offset
62+
struct {
63+
uint64_t exc_value;
64+
} err_stackitem;
65+
66+
// Interpreter frame offset
67+
struct {
68+
uint64_t size;
69+
uint64_t previous;
70+
uint64_t executable; // f_code in 3.12
71+
uint64_t instr_ptr; // prev_instr in 3.12
72+
uint64_t localsplus;
73+
uint64_t owner;
74+
} interpreter_frame;
75+
76+
// Code object offset
77+
struct {
78+
uint64_t size;
79+
uint64_t filename;
80+
uint64_t name;
81+
uint64_t qualname;
82+
uint64_t linetable;
83+
uint64_t firstlineno;
84+
uint64_t argcount;
85+
uint64_t localsplusnames;
86+
uint64_t localspluskinds;
87+
uint64_t co_code_adaptive;
88+
} code_object;
89+
90+
// PyObject offset
91+
struct {
92+
uint64_t size;
93+
uint64_t ob_type;
94+
} pyobject;
95+
96+
// PyTypeObject offset
97+
struct {
98+
uint64_t size;
99+
uint64_t tp_name;
100+
uint64_t tp_repr;
101+
uint64_t tp_flags;
102+
} type_object;
103+
104+
// Tuple object offset
105+
struct {
106+
uint64_t size;
107+
uint64_t ob_item;
108+
uint64_t ob_size;
109+
} tuple_object;
110+
111+
// List object offset
112+
struct {
113+
uint64_t size;
114+
uint64_t ob_item;
115+
uint64_t ob_size;
116+
} list_object;
117+
118+
// Dict object offset
119+
struct {
120+
uint64_t size;
121+
uint64_t ma_keys;
122+
uint64_t ma_values;
123+
} dict_object;
124+
125+
// Float object offset
126+
struct {
127+
uint64_t size;
128+
uint64_t ob_fval;
129+
} float_object;
130+
131+
// Long object offset
132+
struct {
133+
uint64_t size;
134+
uint64_t lv_tag;
135+
uint64_t ob_digit;
136+
} long_object;
137+
138+
// Bytes object offset
139+
struct {
140+
uint64_t size;
141+
uint64_t ob_size;
142+
uint64_t ob_sval;
143+
} bytes_object;
144+
145+
// Unicode object offset
146+
struct {
147+
uint64_t size;
148+
uint64_t state;
149+
uint64_t length;
150+
uint64_t asciiobject_size;
151+
} unicode_object;
152+
153+
// GC runtime state offset
154+
struct {
155+
uint64_t size;
156+
uint64_t collecting;
157+
uint64_t frame;
158+
} gc;
159+
160+
// Generator object offset
161+
struct {
162+
uint64_t size;
163+
uint64_t gi_name;
164+
uint64_t gi_iframe;
165+
uint64_t gi_frame_state;
166+
} gen_object;
167+
168+
} _Py_DebugOffsets;
169+
170+
171+
// Macro to initialize debug offsets - uses 3.12 field names
172+
#define _Py_DebugOffsets_INIT() { \
173+
.cookie = _Py_Debug_Cookie, \
174+
.version = PY_VERSION_HEX, \
175+
.free_threaded = 0, \
176+
.runtime_state = { \
177+
.size = sizeof(_PyRuntimeState), \
178+
.finalizing = offsetof(_PyRuntimeState, _finalizing), \
179+
.interpreters_head = offsetof(_PyRuntimeState, interpreters.head), \
180+
}, \
181+
.interpreter_state = { \
182+
.size = sizeof(PyInterpreterState), \
183+
.id = offsetof(PyInterpreterState, id), \
184+
.next = offsetof(PyInterpreterState, next), \
185+
.threads_head = offsetof(PyInterpreterState, threads.head), \
186+
.gc = offsetof(PyInterpreterState, gc), \
187+
.sysdict = offsetof(PyInterpreterState, sysdict), \
188+
.builtins = offsetof(PyInterpreterState, builtins), \
189+
.ceval_gil = offsetof(PyInterpreterState, ceval) + offsetof(struct _ceval_state, gil), \
190+
.gil_runtime_state = offsetof(PyInterpreterState, _gil), \
191+
.gil_runtime_state_locked = offsetof(PyInterpreterState, _gil) + offsetof(struct _gil_runtime_state, locked), \
192+
.gil_runtime_state_holder = offsetof(PyInterpreterState, _gil) + offsetof(struct _gil_runtime_state, last_holder), \
193+
}, \
194+
.thread_state = { \
195+
.size = sizeof(PyThreadState), \
196+
.prev = offsetof(PyThreadState, prev), \
197+
.next = offsetof(PyThreadState, next), \
198+
.interp = offsetof(PyThreadState, interp), \
199+
.current_frame = offsetof(PyThreadState, current_frame), \
200+
.base_frame = offsetof(PyThreadState, base_frame), \
201+
.last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
202+
.thread_id = offsetof(PyThreadState, thread_id), \
203+
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
204+
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
205+
.status = offsetof(PyThreadState, _status), \
206+
.current_exception = offsetof(PyThreadState, current_exception), \
207+
.exc_state = offsetof(PyThreadState, exc_state), \
208+
.holds_gil = offsetof(PyThreadState, holds_gil), \
209+
.gil_requested = offsetof(PyThreadState, gil_requested), \
210+
}, \
211+
.err_stackitem = { \
212+
.exc_value = offsetof(_PyErr_StackItem, exc_value), \
213+
}, \
214+
.interpreter_frame = { \
215+
.size = sizeof(_PyInterpreterFrame), \
216+
.previous = offsetof(_PyInterpreterFrame, previous), \
217+
.executable = offsetof(_PyInterpreterFrame, f_code), \
218+
.instr_ptr = offsetof(_PyInterpreterFrame, prev_instr), \
219+
.localsplus = offsetof(_PyInterpreterFrame, localsplus), \
220+
.owner = offsetof(_PyInterpreterFrame, owner), \
221+
}, \
222+
.code_object = { \
223+
.size = sizeof(PyCodeObject), \
224+
.filename = offsetof(PyCodeObject, co_filename), \
225+
.name = offsetof(PyCodeObject, co_name), \
226+
.qualname = offsetof(PyCodeObject, co_qualname), \
227+
.linetable = offsetof(PyCodeObject, co_linetable), \
228+
.firstlineno = offsetof(PyCodeObject, co_firstlineno), \
229+
.argcount = offsetof(PyCodeObject, co_argcount), \
230+
.localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \
231+
.localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \
232+
.co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \
233+
}, \
234+
.pyobject = { \
235+
.size = sizeof(PyObject), \
236+
.ob_type = offsetof(PyObject, ob_type), \
237+
}, \
238+
.type_object = { \
239+
.size = sizeof(PyTypeObject), \
240+
.tp_name = offsetof(PyTypeObject, tp_name), \
241+
.tp_repr = offsetof(PyTypeObject, tp_repr), \
242+
.tp_flags = offsetof(PyTypeObject, tp_flags), \
243+
}, \
244+
.tuple_object = { \
245+
.size = sizeof(PyTupleObject), \
246+
.ob_item = offsetof(PyTupleObject, ob_item), \
247+
.ob_size = offsetof(PyVarObject, ob_size), \
248+
}, \
249+
.list_object = { \
250+
.size = sizeof(PyListObject), \
251+
.ob_item = offsetof(PyListObject, ob_item), \
252+
.ob_size = offsetof(PyVarObject, ob_size), \
253+
}, \
254+
.dict_object = { \
255+
.size = sizeof(PyDictObject), \
256+
.ma_keys = offsetof(PyDictObject, ma_keys), \
257+
.ma_values = offsetof(PyDictObject, ma_values), \
258+
}, \
259+
.float_object = { \
260+
.size = sizeof(PyFloatObject), \
261+
.ob_fval = offsetof(PyFloatObject, ob_fval), \
262+
}, \
263+
.long_object = { \
264+
.size = sizeof(PyLongObject), \
265+
.lv_tag = offsetof(PyLongObject, long_value.lv_tag), \
266+
.ob_digit = offsetof(PyLongObject, long_value.ob_digit), \
267+
}, \
268+
.bytes_object = { \
269+
.size = sizeof(PyBytesObject), \
270+
.ob_size = offsetof(PyVarObject, ob_size), \
271+
.ob_sval = offsetof(PyBytesObject, ob_sval), \
272+
}, \
273+
.unicode_object = { \
274+
.size = sizeof(PyUnicodeObject), \
275+
.state = offsetof(PyUnicodeObject, _base._base.state), \
276+
.length = offsetof(PyUnicodeObject, _base._base.length), \
277+
.asciiobject_size = sizeof(PyASCIIObject), \
278+
}, \
279+
.gc = { \
280+
.size = sizeof(struct _gc_runtime_state), \
281+
.collecting = offsetof(struct _gc_runtime_state, collecting), \
282+
.frame = offsetof(struct _gc_runtime_state, frame), \
283+
}, \
284+
.gen_object = { \
285+
.size = sizeof(PyGenObject), \
286+
.gi_name = offsetof(PyGenObject, gi_name), \
287+
.gi_iframe = offsetof(PyGenObject, gi_iframe), \
288+
.gi_frame_state = offsetof(PyGenObject, gi_frame_state), \
289+
}, \
290+
}
291+
292+
293+
#ifdef __cplusplus
294+
}
295+
#endif
296+
#endif /* !Py_INTERNAL_DEBUG_OFFSETS_H */

Include/internal/pycore_frame.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ _PyFrame_SetStackPointer(_PyInterpreterFrame *frame, PyObject **stack_pointer)
174174
static inline bool
175175
_PyFrame_IsIncomplete(_PyInterpreterFrame *frame)
176176
{
177+
// Base frame sentinel has FRAME_OWNED_BY_CSTACK and f_code == NULL
178+
// Don't try to access f_code fields for it
179+
if (frame->owner == FRAME_OWNED_BY_CSTACK && frame->f_code == NULL) {
180+
return 0;
181+
}
177182
return frame->owner != FRAME_OWNED_BY_GENERATOR &&
178183
frame->prev_instr < _PyCode_CODE(frame->f_code) + frame->f_code->_co_firsttraceable;
179184
}
@@ -184,6 +189,10 @@ _PyFrame_GetFirstComplete(_PyInterpreterFrame *frame)
184189
while (frame && _PyFrame_IsIncomplete(frame)) {
185190
frame = frame->previous;
186191
}
192+
// Skip base frame sentinel (FRAME_OWNED_BY_CSTACK with no f_code)
193+
if (frame && frame->owner == FRAME_OWNED_BY_CSTACK && frame->f_code == NULL) {
194+
return NULL;
195+
}
187196
return frame;
188197
}
189198

Include/internal/pycore_gc.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ struct _gc_runtime_state {
187187
collections, and are awaiting to undergo a full collection for
188188
the first time. */
189189
Py_ssize_t long_lived_pending;
190+
191+
/* Frame that started the current collection (may be NULL).
192+
Used by profilers to track GC activity. */
193+
struct _PyInterpreterFrame *frame;
190194
};
191195

192196

Include/internal/pycore_runtime.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ extern "C" {
1010

1111
#include "pycore_atexit.h" // struct atexit_runtime_state
1212
#include "pycore_atomic.h" /* _Py_atomic_address */
13+
#include "pycore_debug_offsets.h" // _Py_DebugOffsets
1314
#include "pycore_ceval_state.h" // struct _ceval_runtime_state
1415
#include "pycore_floatobject.h" // struct _Py_float_runtime_state
1516
#include "pycore_faulthandler.h" // struct _faulthandler_runtime_state
@@ -105,6 +106,10 @@ typedef struct pyruntimestate {
105106

106107
unsigned long main_thread;
107108

109+
// Debug offsets for external profilers and debuggers.
110+
// Must remain at a stable offset for out-of-process tools.
111+
_Py_DebugOffsets debug_offsets;
112+
108113
/* ---------- IMPORTANT ---------------------------
109114
The fields above this line are declared as early as
110115
possible to facilitate out-of-process observability

Include/internal/pycore_runtime_init.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ extern "C" {
1313
#include "pycore_parser.h"
1414
#include "pycore_pymem_init.h"
1515
#include "pycore_obmalloc_init.h"
16+
#include "pycore_debug_offsets.h"
1617

1718

1819
extern PyTypeObject _PyExc_MemoryError;
@@ -37,6 +38,7 @@ extern PyTypeObject _PyExc_MemoryError;
3738
until _PyInterpreterState_Enable() is called. */ \
3839
.next_id = -1, \
3940
}, \
41+
.debug_offsets = _Py_DebugOffsets_INIT(), \
4042
/* A TSS key must be initialized with Py_tss_NEEDS_INIT \
4143
in accordance with the specification. */ \
4244
.autoTSSkey = Py_tss_NEEDS_INIT, \

0 commit comments

Comments
 (0)