Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 25 additions & 36 deletions support/ebpf/native_stack_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,32 +76,24 @@ static EBPF_INLINE ErrorCode get_stack_delta(UnwindState *state, int *addrDiff,
key.fileID = state->text_section_id;
key.page = state->text_section_offset & ~STACK_DELTA_PAGE_MASK;
DEBUG_PRINT(
"Look up stack delta for %lx:%lx",
(unsigned long)state->text_section_id,
(unsigned long)state->text_section_offset);
"sd %lx:%lx", (unsigned long)state->text_section_id, (unsigned long)state->text_section_offset);
StackDeltaPageInfo *info = bpf_map_lookup_elem(&stack_delta_page_to_info, &key);
if (!info) {
DEBUG_PRINT(
"Failure to look up stack delta page fileID %lx, page %lx",
(unsigned long)key.fileID,
(unsigned long)key.page);
DEBUG_PRINT("ERR sdp %lx p=%lx", (unsigned long)key.fileID, (unsigned long)key.page);
state->error_metric = metricID_UnwindNativeErrLookupTextSection;
return ERR_NATIVE_LOOKUP_TEXT_SECTION;
}

void *outer_map = get_stack_delta_map(info->mapID);
if (!outer_map) {
DEBUG_PRINT(
"Failure to look up outer map for text section %lx in mapID %d",
(unsigned long)exe_id,
(int)info->mapID);
DEBUG_PRINT("ERR omap %lx m=%d", (unsigned long)exe_id, (int)info->mapID);
state->error_metric = metricID_UnwindNativeErrLookupStackDeltaOuterMap;
return ERR_NATIVE_LOOKUP_STACK_DELTA_OUTER_MAP;
}

void *inner_map = bpf_map_lookup_elem(outer_map, &exe_id);
if (!inner_map) {
DEBUG_PRINT("Failure to look up inner map for text section %lx", (unsigned long)exe_id);
DEBUG_PRINT("ERR imap %lx", (unsigned long)exe_id);
state->error_metric = metricID_UnwindNativeErrLookupStackDeltaInnerMap;
return ERR_NATIVE_LOOKUP_STACK_DELTA_INNER_MAP;
}
Expand All @@ -114,11 +106,7 @@ static EBPF_INLINE ErrorCode get_stack_delta(UnwindState *state, int *addrDiff,
u32 lo = info->firstDelta;
u32 hi = lo + info->numDeltas;

DEBUG_PRINT(
"Intervals should be from %lu to %lu (mapID %d)",
(unsigned long)lo,
(unsigned long)hi,
(int)info->mapID);
DEBUG_PRINT("bs %lu-%lu m=%d", (unsigned long)lo, (unsigned long)hi, (int)info->mapID);

// Do the binary search, up to 16 iterations. Deltas are paged to 64kB pages.
// They can contain at most 64kB deltas even if everything is single byte opcodes.
Expand All @@ -129,7 +117,7 @@ static EBPF_INLINE ErrorCode get_stack_delta(UnwindState *state, int *addrDiff,
}
}
if (i >= 16 || hi == 0) {
DEBUG_PRINT("Failed bsearch in 16 steps. Corrupt data?");
DEBUG_PRINT("ERR bs16");
state->error_metric = metricID_UnwindNativeErrLookupIterations;
return ERR_NATIVE_EXCEEDED_DELTA_LOOKUP_ITERATIONS;
}
Expand All @@ -149,8 +137,7 @@ static EBPF_INLINE ErrorCode get_stack_delta(UnwindState *state, int *addrDiff,
return ERR_NATIVE_LOOKUP_RANGE;
}

DEBUG_PRINT(
"delta index %d, addrLow 0x%x, unwindInfo %d", idx, delta->addrLow, delta->unwindInfo);
DEBUG_PRINT("d %d a=%x u=%d", idx, delta->addrLow, delta->unwindInfo);

// Calculate PC delta from stack delta for merged delta comparison
int deltaOffset = (int)page_offset - (int)delta->addrLow;
Expand All @@ -168,10 +155,6 @@ static EBPF_INLINE ErrorCode get_stack_delta(UnwindState *state, int *addrDiff,
state->error_metric = metricID_UnwindNativeErrStackDeltaInvalid;
return ERR_NATIVE_STACK_DELTA_INVALID;
}
if (delta->unwindInfo == STACK_DELTA_STOP) {
increment_metric(metricID_UnwindNativeStackDeltaStop);
}

return ERR_OK;
}

Expand Down Expand Up @@ -209,7 +192,7 @@ unwind_calc_register_with_deref(UnwindState *state, u8 baseReg, s32 param, bool
// Dereference, and add the postDereference adder.
unsigned long val;
if (bpf_probe_read_user(&val, sizeof(val), (void *)addr)) {
DEBUG_PRINT("unwind failed to dereference address 0x%lx", (unsigned long)addr);
DEBUG_PRINT("ERR deref %lx", (unsigned long)addr);
return 0;
}
// Return: "*(BASE + preDeref) + postDeref"
Expand Down Expand Up @@ -257,7 +240,7 @@ static EBPF_INLINE ErrorCode unwind_one_frame(PerCPURecord *record, bool *stop)
// This is the hard coded implementation of this expression. For further details,
// see https://hal.inria.fr/hal-02297690/document, page 4. (DOI: 10.1145/3360572)
cfa = state->sp + 8 + ((((state->pc & 15) >= 11) ? 1 : 0) << 3);
// DEBUG_PRINT("PLT, cfa=0x%lx", (unsigned long)cfa);
DEBUG_PRINT("PLT %lx", (unsigned long)cfa);
break;
case UNWIND_COMMAND_SIGNAL: {
// Use the PerCPURecord scratch union instead of a stack-local buffer to avoid
Expand All @@ -280,10 +263,13 @@ static EBPF_INLINE ErrorCode unwind_one_frame(PerCPURecord *record, bool *stop)
state->pc = rt_regs[16];

state->return_address = false;
// DEBUG_PRINT("signal frame");
DEBUG_PRINT("sigf");
goto frame_ok;
}
case UNWIND_COMMAND_STOP: *stop = true; return ERR_OK;
case UNWIND_COMMAND_STOP:
increment_metric(metricID_UnwindNativeStackDeltaStop);
*stop = true;
return ERR_OK;
case UNWIND_COMMAND_FRAME_POINTER:
if (!unwinder_unwind_frame_pointer(state)) {
goto err_native_pc_read;
Expand All @@ -305,10 +291,10 @@ static EBPF_INLINE ErrorCode unwind_one_frame(PerCPURecord *record, bool *stop)

s32 param = info->param;
if (info->mergeOpcode) {
// DEBUG_PRINT("AddrDiff %d, merged delta %#02x", addrDiff, info->mergeOpcode);
DEBUG_PRINT("ad=%d md=%x", addrDiff, info->mergeOpcode);
if (addrDiff >= (info->mergeOpcode & ~MERGEOPCODE_NEGATIVE)) {
param += (info->mergeOpcode & MERGEOPCODE_NEGATIVE) ? -8 : 8;
// DEBUG_PRINT("Merged delta match: cfaDelta=%d", unwindInfo);
DEBUG_PRINT("mdm cfa=%d", unwindInfo);
}
}

Expand Down Expand Up @@ -380,10 +366,13 @@ static EBPF_INLINE ErrorCode unwind_one_frame(struct PerCPURecord *record, bool

state->return_address = false;
state->lr_invalid = false;
DEBUG_PRINT("signal frame");
DEBUG_PRINT("sigf");
goto frame_ok;
}
case UNWIND_COMMAND_STOP: *stop = true; return ERR_OK;
case UNWIND_COMMAND_STOP:
increment_metric(metricID_UnwindNativeStackDeltaStop);
*stop = true;
return ERR_OK;
case UNWIND_COMMAND_FRAME_POINTER:
if (!unwinder_unwind_frame_pointer(state)) {
goto err_native_pc_read;
Expand All @@ -401,16 +390,16 @@ static EBPF_INLINE ErrorCode unwind_one_frame(struct PerCPURecord *record, bool
UnwindInfo *info = bpf_map_lookup_elem(&unwind_info_array, &unwindInfo);
if (!info) {
increment_metric(metricID_UnwindNativeErrBadUnwindInfoIndex);
DEBUG_PRINT("Giving up due to invalid unwind info array index");
DEBUG_PRINT("ERR uwi idx");
return ERR_NATIVE_BAD_UNWIND_INFO_INDEX;
}

s32 param = info->param;
if (info->mergeOpcode) {
DEBUG_PRINT("AddrDiff %d, merged delta %#02x", addrDiff, info->mergeOpcode);
DEBUG_PRINT("ad=%d md=%x", addrDiff, info->mergeOpcode);
if (addrDiff >= (info->mergeOpcode & ~MERGEOPCODE_NEGATIVE)) {
param += (info->mergeOpcode & MERGEOPCODE_NEGATIVE) ? -8 : 8;
DEBUG_PRINT("Merged delta match: cfaDelta=%d", unwindInfo);
DEBUG_PRINT("mdm cfa=%d", unwindInfo);
}
}

Expand All @@ -428,7 +417,7 @@ static EBPF_INLINE ErrorCode unwind_one_frame(struct PerCPURecord *record, bool
increment_metric(metricID_UnwindNativeErrPCRead);
}
// report failure to resolve RA and stop unwinding
DEBUG_PRINT("Giving up due to failure to resolve RA");
DEBUG_PRINT("ERR RA");
return ERR_NATIVE_PC_READ;
}

Expand Down
49 changes: 25 additions & 24 deletions support/ebpf/python_tracer.ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ static EBPF_INLINE ErrorCode process_python_frame(

// Read PyFrameObject
if (bpf_probe_read_user(pss->frame, sizeof(pss->frame), py_frameobject)) {
// DEBUG_PRINT("Failed to read PyFrameObject 0x%lx", (unsigned long)py_frameobject);
DEBUG_PRINT(
"E%d %lx", metricID_UnwindPythonErrBadFrameCodeObjectAddr, (unsigned long)py_frameobject);
increment_metric(metricID_UnwindPythonErrBadFrameCodeObjectAddr);
return ERR_PYTHON_BAD_FRAME_OBJECT_ADDR;
}
Expand Down Expand Up @@ -119,10 +120,10 @@ static EBPF_INLINE ErrorCode process_python_frame(
}

if (!py_codeobject) {
// DEBUG_PRINT(
// "Null codeobject for PyFrameObject 0x%lx 0x%lx",
// (unsigned long)py_frameobject,
// (unsigned long)(py_frameobject + pyinfo->PyFrameObject_f_code));
DEBUG_PRINT(
"E%d %lx",
metricID_UnwindPythonZeroFrameCodeObject,
(unsigned long)(py_frameobject + pyinfo->PyFrameObject_f_code));
increment_metric(metricID_UnwindPythonZeroFrameCodeObject);
goto push_frame;
}
Expand All @@ -139,9 +140,8 @@ static EBPF_INLINE ErrorCode process_python_frame(
// Read PyCodeObject
long pycode_err = bpf_probe_read_user(pss->code, sizeof(pss->code), py_codeobject);
if (pycode_err) {
// DEBUG_PRINT(
// "Failed to read PyCodeObject at 0x%lx err=%ld", (unsigned long)(py_codeobject),
// pycode_err);
DEBUG_PRINT(
"E%d %lx", metricID_UnwindPythonErrBadCodeObjectArgCountAddr, (unsigned long)(py_codeobject));
increment_metric(metricID_UnwindPythonErrBadCodeObjectArgCountAddr);
// Push the frame with the code object address so the agent can try to
// read it via /proc/pid/mem (which supports page faults unlike BPF).
Expand All @@ -163,10 +163,10 @@ static EBPF_INLINE ErrorCode process_python_frame(
lineno = py_encode_lineno(codeobject_id, (u32)py_f_lasti);

push_frame:
DEBUG_PRINT("Pushing Python %lx %lu", (unsigned long)file_id, (unsigned long)lineno);
DEBUG_PRINT("py+ %lx %lu", (unsigned long)file_id, (unsigned long)lineno);
ErrorCode error = push_python(&record->state, trace, file_id, lineno);
if (error) {
DEBUG_PRINT("failed to push python frame");
DEBUG_PRINT("ERR py+");
return error;
}
increment_metric(metricID_UnwindPythonFrames);
Expand All @@ -184,7 +184,8 @@ static EBPF_INLINE ErrorCode get_PyThreadState(
if (pyinfo->tls_offset != 0) {
if (bpf_probe_read_user(thread_state, sizeof(void *), tsd_base + pyinfo->tls_offset)) {
DEBUG_PRINT(
"Failed to read direct TLS at base 0x%lx offset %d",
"E%d %lx %d",
metricID_UnwindPythonErrReadThreadStateAddr,
(unsigned long)tsd_base,
pyinfo->tls_offset);
increment_metric(metricID_UnwindPythonErrReadThreadStateAddr);
Expand All @@ -196,7 +197,8 @@ static EBPF_INLINE ErrorCode get_PyThreadState(
// Python 3.12 and earlier: use pthread TLS
int key;
if (bpf_probe_read_user(&key, sizeof(key), autoTLSkeyAddr)) {
DEBUG_PRINT("Failed to read autoTLSkey from 0x%lx", (unsigned long)autoTLSkeyAddr);
DEBUG_PRINT(
"E%d %lx", metricID_UnwindPythonErrBadAutoTlsKeyAddr, (unsigned long)autoTLSkeyAddr);
increment_metric(metricID_UnwindPythonErrBadAutoTlsKeyAddr);
return ERR_PYTHON_BAD_AUTO_TLS_KEY_ADDR;
}
Expand All @@ -213,14 +215,11 @@ static EBPF_INLINE ErrorCode get_PyFrame(const PyProcInfo *pyinfo, void **frame)
{
void *tsd_base;
if (tsd_get_base(&tsd_base)) {
DEBUG_PRINT("Failed to get TSD base address");
DEBUG_PRINT("E%d", metricID_UnwindPythonErrReadTsdBase);
increment_metric(metricID_UnwindPythonErrReadTsdBase);
return ERR_PYTHON_READ_TSD_BASE;
}
DEBUG_PRINT(
"TSD Base 0x%lx, autoTLSKeyAddr 0x%lx",
(unsigned long)tsd_base,
(unsigned long)pyinfo->autoTLSKeyAddr);
DEBUG_PRINT("TSD %lx akey %lx", (unsigned long)tsd_base, (unsigned long)pyinfo->autoTLSKeyAddr);

// Get the PyThreadState from TSD
void *py_tsd_thread_state;
Expand All @@ -231,7 +230,7 @@ static EBPF_INLINE ErrorCode get_PyFrame(const PyProcInfo *pyinfo, void **frame)
}

if (!py_tsd_thread_state) {
DEBUG_PRINT("PyThreadState is 0x0");
DEBUG_PRINT("E%d", metricID_UnwindPythonErrZeroThreadState);
increment_metric(metricID_UnwindPythonErrZeroThreadState);
return ERR_PYTHON_ZERO_THREAD_STATE;
}
Expand All @@ -240,7 +239,8 @@ static EBPF_INLINE ErrorCode get_PyFrame(const PyProcInfo *pyinfo, void **frame)
if (bpf_probe_read_user(
frame, sizeof(void *), py_tsd_thread_state + pyinfo->PyThreadState_frame)) {
DEBUG_PRINT(
"Failed to read PyThreadState.frame at 0x%lx",
"E%d %lx",
metricID_UnwindPythonErrBadThreadStateFrameAddr,
(unsigned long)(py_tsd_thread_state + pyinfo->PyThreadState_frame));
increment_metric(metricID_UnwindPythonErrBadThreadStateFrameAddr);
return ERR_PYTHON_BAD_THREAD_STATE_FRAME_ADDR;
Expand All @@ -249,7 +249,8 @@ static EBPF_INLINE ErrorCode get_PyFrame(const PyProcInfo *pyinfo, void **frame)
if (pyinfo->frame_is_cframe) {
if (bpf_probe_read_user(frame, sizeof(void *), *frame + pyinfo->PyCFrame_current_frame)) {
DEBUG_PRINT(
"Failed to read _PyCFrame.current_frame at 0x%lx",
"E%d %lx",
metricID_UnwindPythonErrBadCFrameFrameAddr,
(unsigned long)(*frame + pyinfo->PyCFrame_current_frame));
increment_metric(metricID_UnwindPythonErrBadCFrameFrameAddr);
return ERR_PYTHON_BAD_CFRAME_CURRENT_FRAME_ADDR;
Expand Down Expand Up @@ -328,18 +329,18 @@ static EBPF_INLINE int unwind_python(struct pt_regs *ctx)
Trace *trace = &record->trace;
u32 pid = trace->pid;

DEBUG_PRINT("unwind_python()");
DEBUG_PRINT("upy");

const PyProcInfo *pyinfo = bpf_map_lookup_elem(&py_procs, &pid);
if (!pyinfo) {
// Not a Python process that we have info on
DEBUG_PRINT("Can't build Python stack, no address info");
DEBUG_PRINT("E%d", metricID_UnwindPythonErrNoProcInfo);
increment_metric(metricID_UnwindPythonErrNoProcInfo);
error = ERR_PYTHON_NO_PROC_INFO;
goto exit;
}

DEBUG_PRINT("Building Python stack for 0x%x", pyinfo->version);
DEBUG_PRINT("py v=%x", pyinfo->version);
if (!record->pythonUnwindState.py_frame) {
increment_metric(metricID_UnwindPythonAttempts);
error = get_PyFrame(pyinfo, &record->pythonUnwindState.py_frame);
Expand All @@ -348,7 +349,7 @@ static EBPF_INLINE int unwind_python(struct pt_regs *ctx)
}
}
if (!record->pythonUnwindState.py_frame) {
DEBUG_PRINT(" -> Python frames are handled");
DEBUG_PRINT("py done");
unwinder_mark_done(record, PROG_UNWIND_PYTHON);
goto exit;
}
Expand Down
Loading
Loading