Skip to content

Commit 46bf832

Browse files
committed
Combine python and native unwinder into single loop
Python, especially pytorch programs can exhaust the tail call limit by switching from python to native unwinders more than 29 times. This happens because of eval/delegation patterns where one python frame will be decorated with a couple native frames. In order to unwind these stack successfully fold the native unwinder into the python unwinder so at each frame a python or native frame can be unwound. Replace the separate walk_python_stack inner loop and outer transition loop with a single switch-in-loop structure using step_python and step_native helper functions. This reduces tail call usage from one per batch to one per loop budget exhaustion (PYTHON_NATIVE_LOOP_ITERS=9 iterations). Move native unwinder map externs (exe_id_to_*_stack_deltas, stack_delta_page_to_info, unwind_info_array) out of the TESTING_COREDUMP guard in extmaps.h so python_tracer.ebpf.c can include native_stack_trace.h. Python loop iters is now a ro_vars entry so it can be set low by default and jacked up with debug_prints are disabled which allows for much bigger stacks.
1 parent d8b9470 commit 46bf832

File tree

6 files changed

+101
-51
lines changed

6 files changed

+101
-51
lines changed

support/ebpf/extmaps.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,7 @@ extern struct interpreter_offsets_t interpreter_offsets;
1616
extern struct trace_events_t trace_events;
1717
extern struct go_labels_procs_t go_labels_procs;
1818

19-
#if defined(TESTING_COREDUMP)
20-
21-
// References to maps in alphabetical order that
22-
// are needed only for testing.
23-
24-
extern struct apm_int_procs_t apm_int_procs;
25-
extern struct beam_procs_t beam_procs;
19+
// These are needed by both native and hybrid python unwinder.
2620
extern struct exe_id_to_8_stack_deltas_t exe_id_to_8_stack_deltas;
2721
extern struct exe_id_to_9_stack_deltas_t exe_id_to_9_stack_deltas;
2822
extern struct exe_id_to_10_stack_deltas_t exe_id_to_10_stack_deltas;
@@ -39,14 +33,21 @@ extern struct exe_id_to_20_stack_deltas_t exe_id_to_20_stack_deltas;
3933
extern struct exe_id_to_21_stack_deltas_t exe_id_to_21_stack_deltas;
4034
extern struct exe_id_to_22_stack_deltas_t exe_id_to_22_stack_deltas;
4135
extern struct exe_id_to_23_stack_deltas_t exe_id_to_23_stack_deltas;
36+
extern struct stack_delta_page_to_info_t stack_delta_page_to_info;
37+
extern struct unwind_info_array_t unwind_info_array;
38+
39+
#if defined(TESTING_COREDUMP)
40+
41+
// References to maps in alphabetical order that
42+
// are needed only for testing.
43+
extern struct apm_int_procs_t apm_int_procs;
44+
extern struct beam_procs_t beam_procs;
4245
extern struct hotspot_procs_t hotspot_procs;
4346
extern struct dotnet_procs_t dotnet_procs;
4447
extern struct perl_procs_t perl_procs;
4548
extern struct php_procs_t php_procs;
4649
extern struct py_procs_t py_procs;
4750
extern struct ruby_procs_t ruby_procs;
48-
extern struct stack_delta_page_to_info_t stack_delta_page_to_info;
49-
extern struct unwind_info_array_t unwind_info_array;
5051
extern struct v8_procs_t v8_procs;
5152
#endif // TESTING_COREDUMP
5253

support/ebpf/native_stack_trace.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#ifndef OPTI_NATIVE_STACK_TRACE_H
22
#define OPTI_NATIVE_STACK_TRACE_H
33

4+
#include "bpfdefs.h"
5+
#include "extmaps.h"
6+
#include "tracemgmt.h"
7+
48
// Unwind info value for invalid stack delta
59
#define STACK_DELTA_INVALID (STACK_DELTA_COMMAND_FLAG | UNWIND_COMMAND_INVALID)
610
#define STACK_DELTA_STOP (STACK_DELTA_COMMAND_FLAG | UNWIND_COMMAND_STOP)

support/ebpf/python_tracer.ebpf.c

Lines changed: 80 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,11 @@
22

33
#include "bpfdefs.h"
44
#include "errors.h"
5+
#include "native_stack_trace.h"
56
#include "tracemgmt.h"
67
#include "tsd.h"
78
#include "types.h"
89

9-
// The number of Python frames to unwind per frame-unwinding eBPF program. If
10-
// we start running out of instructions in the walk_python_stack program, one
11-
// option is to adjust this number downwards.
12-
#define FRAMES_PER_WALK_PYTHON_STACK 12
13-
1410
// Forward declaration to avoid warnings like
1511
// "declaration of 'struct pt_regs' will not be visible outside of this function [-Wvisibility]".
1612
struct pt_regs;
@@ -141,8 +137,10 @@ static EBPF_INLINE ErrorCode process_python_frame(
141137
}
142138

143139
// Read PyCodeObject
144-
if (bpf_probe_read_user(pss->code, sizeof(pss->code), py_codeobject)) {
145-
DEBUG_PRINT("Failed to read PyCodeObject at 0x%lx", (unsigned long)(py_codeobject));
140+
long pycode_err = bpf_probe_read_user(pss->code, sizeof(pss->code), py_codeobject);
141+
if (pycode_err) {
142+
DEBUG_PRINT(
143+
"Failed to read PyCodeObject at 0x%lx err=%ld", (unsigned long)(py_codeobject), pycode_err);
146144
increment_metric(metricID_UnwindPythonErrBadCodeObjectArgCountAddr);
147145
return ERR_PYTHON_BAD_CODE_OBJECT_ADDR;
148146
}
@@ -169,39 +167,6 @@ static EBPF_INLINE ErrorCode process_python_frame(
169167
return ERR_OK;
170168
}
171169

172-
static EBPF_INLINE ErrorCode
173-
walk_python_stack(PerCPURecord *record, const PyProcInfo *pyinfo, int *unwinder)
174-
{
175-
void *py_frame = record->pythonUnwindState.py_frame;
176-
ErrorCode error = ERR_OK;
177-
*unwinder = PROG_UNWIND_STOP;
178-
179-
for (u32 i = 0; i < FRAMES_PER_WALK_PYTHON_STACK; ++i) {
180-
bool continue_with_next;
181-
error = process_python_frame(record, pyinfo, &py_frame, &continue_with_next);
182-
if (error) {
183-
goto stop;
184-
}
185-
if (continue_with_next) {
186-
*unwinder = get_next_unwinder_after_interpreter();
187-
goto stop;
188-
}
189-
if (!py_frame) {
190-
goto stop;
191-
}
192-
}
193-
194-
*unwinder = PROG_UNWIND_PYTHON;
195-
196-
stop:
197-
// Set up the state for the next invocation of this unwinding program.
198-
if (error || !py_frame) {
199-
unwinder_mark_done(record, PROG_UNWIND_PYTHON);
200-
}
201-
record->pythonUnwindState.py_frame = py_frame;
202-
return error;
203-
}
204-
205170
// get_PyThreadState retrieves the PyThreadState* for the current thread.
206171
//
207172
// Python 3.12 and earlier set the thread_state using pthread_setspecific with the key
@@ -288,6 +253,60 @@ static EBPF_INLINE ErrorCode get_PyFrame(const PyProcInfo *pyinfo, void **frame)
288253
return ERR_OK;
289254
}
290255

256+
// Number of loop iterations in unwind_python. Each iteration handles either
257+
// one Python frame or one native frame depending on the current unwinder state.
258+
// This is a RODATA variable so the host agent can tune it based on whether
259+
// debug output is enabled (which affects the verifier instruction budget).
260+
BPF_RODATA_VAR(u32, python_native_loop_iters, 6)
261+
262+
// step_python processes one Python frame and updates *unwinder to indicate
263+
// what should happen next
264+
static EBPF_INLINE ErrorCode
265+
step_python(PerCPURecord *record, const PyProcInfo *pyinfo, void **py_frame, int *unwinder)
266+
{
267+
bool continue_with_next;
268+
ErrorCode error = process_python_frame(record, pyinfo, py_frame, &continue_with_next);
269+
if (error) {
270+
*unwinder = PROG_UNWIND_STOP;
271+
return error;
272+
}
273+
if (continue_with_next) {
274+
*unwinder = get_next_unwinder_after_interpreter();
275+
} else if (!*py_frame) {
276+
*unwinder = PROG_UNWIND_STOP;
277+
} else {
278+
*unwinder = PROG_UNWIND_PYTHON;
279+
}
280+
return ERR_OK;
281+
}
282+
283+
// step_native processes one native frame at an interpreter boundary and
284+
// updates *unwinder
285+
static EBPF_INLINE ErrorCode step_native(PerCPURecord *record, int *unwinder)
286+
{
287+
Trace *trace = &record->trace;
288+
*unwinder = PROG_UNWIND_STOP;
289+
290+
increment_metric(metricID_UnwindNativeAttempts);
291+
ErrorCode error = push_native(
292+
&record->state,
293+
trace,
294+
record->state.text_section_id,
295+
record->state.text_section_offset,
296+
record->state.return_address);
297+
if (error) {
298+
return error;
299+
}
300+
301+
bool stop;
302+
error = unwind_one_frame(record, &stop);
303+
if (error || stop) {
304+
return error;
305+
}
306+
307+
return get_next_unwinder_after_native_frame(record, unwinder);
308+
}
309+
291310
// unwind_python is the entry point for tracing when invoked from the native tracer
292311
// or interpreter dispatcher. It does not reset the trace object and will append the
293312
// Python stack frames to the trace object for the current CPU.
@@ -298,7 +317,7 @@ static EBPF_INLINE int unwind_python(struct pt_regs *ctx)
298317
return -1;
299318

300319
ErrorCode error = ERR_OK;
301-
int unwinder = get_next_unwinder_after_interpreter();
320+
int unwinder = PROG_UNWIND_PYTHON;
302321
Trace *trace = &record->trace;
303322
u32 pid = trace->pid;
304323

@@ -327,7 +346,26 @@ static EBPF_INLINE int unwind_python(struct pt_regs *ctx)
327346
goto exit;
328347
}
329348

330-
error = walk_python_stack(record, pyinfo, &unwinder);
349+
{
350+
void *py_frame = record->pythonUnwindState.py_frame;
351+
352+
for (u32 t = 0; t < python_native_loop_iters; t++) {
353+
switch (unwinder) {
354+
case PROG_UNWIND_PYTHON: error = step_python(record, pyinfo, &py_frame, &unwinder); break;
355+
case PROG_UNWIND_NATIVE: error = step_native(record, &unwinder); break;
356+
default: goto done;
357+
}
358+
if (error) {
359+
goto done;
360+
}
361+
}
362+
363+
done:
364+
if (error || !py_frame) {
365+
unwinder_mark_done(record, PROG_UNWIND_PYTHON);
366+
}
367+
record->pythonUnwindState.py_frame = py_frame;
368+
}
331369

332370
exit:
333371
record->state.unwind_error = error;

support/ebpf/tracer.ebpf.amd64

59.5 KB
Binary file not shown.

support/ebpf/tracer.ebpf.arm64

61.8 KB
Binary file not shown.

tracer/systemconfig.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,13 @@ func loadRodataVars(coll *cebpf.CollectionSpec, kmod *kallsyms.Module, cfg *Conf
302302
if err := coll.Variables["with_debug_output"].Set(uint32(1)); err != nil {
303303
return fmt.Errorf("failed to set debug output: %v", err)
304304
}
305+
} else {
306+
// Without debug output the verifier skips DEBUG_PRINT branches,
307+
// leaving enough instruction budget to increase the Python
308+
// unwinder loop iterations (default 7 -> 12).
309+
if err := coll.Variables["python_native_loop_iters"].Set(uint32(12)); err != nil {
310+
return fmt.Errorf("failed to set python_native_loop_iters: %v", err)
311+
}
305312
}
306313

307314
if err := coll.Variables["off_cpu_threshold"].Set(cfg.OffCPUThreshold); err != nil {

0 commit comments

Comments
 (0)