Skip to content

Commit 5af1c6c

Browse files
committed
Walk YJIT JIT frames via frame pointers for full stack unwinding
Replace the jit_detected flag approach with V8-style frame pointer unwinding through Ruby JIT frames. When YJIT emits frame pointers (always on arm64, with --yjit-perf on x86_64), the Ruby eBPF unwinder walks the native FP chain through JIT frames, pushes each as a RUBY_FRAME_TYPE_JIT frame, then resolves the post-JIT mapping so native unwinding can continue below the Ruby VM stack. When frame pointers are not available, the original behavior is preserved: a single dummy JIT frame is pushed, cfuncs are pushed inline, and native unwinding is stopped at the end of the Ruby stack. Also fixes parseMappings discarding prctl-labeled [anon:...] mappings, which prevented the YJIT JIT region from being visible to interpreter handlers.
1 parent c9c7b8e commit 5af1c6c

File tree

4 files changed

+100
-61
lines changed

4 files changed

+100
-61
lines changed

interpreter/ruby/ruby.go

Lines changed: 28 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
package ruby // import "go.opentelemetry.io/ebpf-profiler/interpreter/ruby"
55

66
import (
7+
"debug/elf"
78
"encoding/binary"
89
"errors"
910
"fmt"
1011
"math/bits"
12+
"os"
1113
"regexp"
1214
"runtime"
1315
"strconv"
@@ -1279,54 +1281,26 @@ func profileFrameFullLabel(classPath, label, baseLabel, methodName libpf.String,
12791281
return libpf.Intern(profileLabel)
12801282
}
12811283

1282-
// findJITRegion detects the YJIT JIT code region from process memory mappings.
1283-
// YJIT reserves a large contiguous address range (typically 48-128 MiB) via mmap
1284-
// with PROT_NONE and then mprotects individual 16k codepages to r-x as needed.
1285-
// On systems with CONFIG_ANON_VMA_NAME, Ruby labels the region via prctl(PR_SET_VMA)
1286-
// giving it a path like "[anon:Ruby:rb_yjit_reserve_addr_space]".
1287-
// On systems without that config, we fall back to a heuristic: the first anonymous
1288-
// executable mapping (by address) is assumed to be the JIT region since YJIT
1289-
// initializes before any gems could create anonymous executable mappings.
1290-
// Returns (start, end, found).
1291-
func findJITRegion(mappings []process.RawMapping) (uint64, uint64, bool) {
1292-
var jitStart, jitEnd uint64
1293-
labelFound := false
1294-
var heuristicStart, heuristicEnd uint64
1295-
heuristicFound := false
1296-
1297-
for idx := range mappings {
1298-
m := &mappings[idx]
1299-
1300-
// Check for prctl-labeled JIT region. These mappings may be ---p (PROT_NONE)
1301-
// or r-xp depending on whether YJIT has activated codepages in this region.
1302-
if strings.Contains(m.Path, "jit_reserve_addr_space") {
1303-
if !labelFound || m.Vaddr < jitStart {
1304-
jitStart = m.Vaddr
1305-
}
1306-
if !labelFound || m.Vaddr+m.Length > jitEnd {
1307-
jitEnd = m.Vaddr + m.Length
1308-
}
1309-
labelFound = true
1310-
continue
1311-
}
1312-
1313-
// Heuristic fallback: first anonymous executable mapping by address.
1314-
// Mappings from /proc/pid/maps are sorted by address, so the first
1315-
// match is the lowest address.
1316-
if !heuristicFound && m.IsExecutable() && m.IsAnonymous() {
1317-
heuristicStart = m.Vaddr
1318-
heuristicEnd = m.Vaddr + m.Length
1319-
heuristicFound = true
1320-
}
1284+
// hasJitFramePointers detects whether YJIT is emitting frame pointers for this process.
1285+
// On arm64, YJIT always emits frame pointers unconditionally.
1286+
// On x86_64, frame pointers are only emitted when --yjit-perf or --yjit-perf=fp is used.
1287+
// When --yjit-perf is active, YJIT also creates /tmp/perf-PID.map, which we use as the
1288+
// detection signal on x86_64.
1289+
func hasJitFramePointers(pr process.Process) bool {
1290+
machine := pr.GetMachineData().Machine
1291+
if machine == elf.EM_AARCH64 {
1292+
// YJIT on arm64 always emits frame pointers (unconditionally in the backend).
1293+
return true
13211294
}
13221295

1323-
if labelFound {
1324-
return jitStart, jitEnd, true
1325-
}
1326-
if heuristicFound {
1327-
return heuristicStart, heuristicEnd, true
1296+
// On x86_64, check for the perf map file which indicates --yjit-perf was used.
1297+
// The --yjit-perf flag enables both frame pointers and the perf map.
1298+
perfMapPath := fmt.Sprintf("/tmp/perf-%d.map", pr.PID())
1299+
if _, err := os.Stat(perfMapPath); err == nil {
1300+
return true
13281301
}
1329-
return 0, 0, false
1302+
1303+
return false
13301304
}
13311305

13321306
func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
@@ -1375,10 +1349,18 @@ func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
13751349
if jitFound && (r.procInfo.Jit_start != jitStart || r.procInfo.Jit_end != jitEnd) {
13761350
r.procInfo.Jit_start = jitStart
13771351
r.procInfo.Jit_end = jitEnd
1352+
1353+
// Detect whether the JIT is emitting frame pointers.
1354+
// On arm64, YJIT always emits frame pointers unconditionally.
1355+
// On x86_64, frame pointers are only emitted with --yjit-perf or --yjit-perf=fp,
1356+
// which also creates a /tmp/perf-PID.map file as a side effect.
1357+
r.procInfo.Frame_pointers_enabled = hasJitFramePointers(pr)
1358+
13781359
if err := ebpf.UpdateProcData(libpf.Ruby, pr.PID(), unsafe.Pointer(r.procInfo)); err != nil {
13791360
return err
13801361
}
1381-
log.Debugf("Updated JIT region %#x-%#x in ruby proc info", jitStart, jitEnd)
1362+
log.Debugf("Updated JIT region %#x-%#x in ruby proc info (frame_pointers=%v)",
1363+
jitStart, jitEnd, r.procInfo.Frame_pointers_enabled)
13821364
}
13831365
// Remove prefixes not seen
13841366
for prefix, generationPtr := range r.prefixes {

support/ebpf/ruby_tracer.ebpf.c

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ struct ruby_procs_t {
1919
// option is to adjust this number downwards.
2020
// NOTE: the maximum size stack is FRAMES_PER_WALK_RUBY_STACK * calls to tail_call().
2121
#define FRAMES_PER_WALK_RUBY_STACK 32
22+
23+
// The maximum number of JIT frames to unwind via frame pointers.
24+
// YJIT creates one native frame per JIT entry (not per Ruby method),
25+
// so in practice there is typically only 1 (occasionally 2 for nested entries).
26+
#define MAX_JIT_FP_FRAMES 4
2227
// When resolving a CME, we need to traverse environment pointers until we
2328
// find IMEMO_MENT. Since we can't do a while loop, we have to bound this
2429
// the max encountered in experimentation on a production rails app is 6.
@@ -271,8 +276,9 @@ static EBPF_INLINE ErrorCode read_ruby_frame(
271276
// frames will almost certainly be incorrect for Ruby versions < 2.6.
272277
frame_type = RUBY_FRAME_TYPE_CME_CFUNC;
273278
} else if (record->rubyUnwindState.jit_detected) {
274-
// If we detected a jit frame and are now in a cfunc, push the c frame
275-
// as we can no longer unwind native anymore
279+
// JIT is active but frame pointers are not available, so we cannot unwind
280+
// through JIT frames to get back to native code. Push the cfunc inline
281+
// instead of handing off to the native unwinder.
276282
frame_type = RUBY_FRAME_TYPE_CME_CFUNC;
277283
} else {
278284
// We save this cfp on in the "Record" entry, and when we start the unwinder
@@ -450,19 +456,62 @@ static EBPF_INLINE ErrorCode walk_ruby_stack(
450456
record->rubyUnwindState.cfunc_saved_frame = 0;
451457
}
452458

459+
// If the CPU PC is in the JIT region, walk the native frame pointer chain through JIT frames.
460+
// This follows the same pattern as the V8 unwinder (v8_tracer.ebpf.c): push each JIT frame,
461+
// then use unwinder_unwind_frame_pointer() to advance PC/SP/FP to the caller.
462+
// YJIT creates one native FP frame per JIT entry, not per Ruby method, so there are
463+
// typically only 1-2 frames to walk.
464+
//
465+
// If frame_pointers_enabled is false (e.g. x86_64 without --yjit-perf), we push a single
466+
// dummy JIT frame and skip FP walking -- the stack will be truncated at the Ruby VM frames
467+
// but won't produce garbage from following an invalid FP chain.
453468
if (
454469
rubyinfo->jit_start > 0 && record->state.pc >= rubyinfo->jit_start &&
455470
record->state.pc < rubyinfo->jit_end) {
456-
record->rubyUnwindState.jit_detected = true;
457-
458-
// If the first frame is a jit PC, the leaf ruby frame should be the jit "owner"
459-
// the cpu PC is also pushed as the address,
460-
// as in theory this can be used to symbolize the JIT frame later
461-
if (trace->num_frames == 0) {
462-
ErrorCode error =
471+
if (rubyinfo->frame_pointers_enabled) {
472+
// Walk the native FP chain through JIT frames, pushing each as a JIT frame
473+
// so it can potentially be symbolized via perf maps later.
474+
UNROLL for (int j = 0; j < MAX_JIT_FP_FRAMES; j++)
475+
{
476+
ErrorCode jit_error =
477+
push_ruby(&record->state, trace, RUBY_FRAME_TYPE_JIT, (u64)record->state.pc, 0, 0);
478+
if (jit_error) {
479+
return jit_error;
480+
}
481+
482+
if (!unwinder_unwind_frame_pointer(&record->state)) {
483+
// FP chain broken, cannot continue
484+
*next_unwinder = PROG_UNWIND_STOP;
485+
return ERR_OK;
486+
}
487+
488+
// Check if we've left the JIT region
489+
if (record->state.pc < rubyinfo->jit_start || record->state.pc >= rubyinfo->jit_end) {
490+
break;
491+
}
492+
}
493+
// After walking JIT frames, PC should be in rb_vm_exec or other native code.
494+
// We must resolve the mapping for the new PC so that text_section_id/offset/bias
495+
// are up to date. Without this, the native unwinder would try to use stale mapping
496+
// info from the JIT region and fail with ERR_NATIVE_NO_PID_PAGE_MAPPING.
497+
ErrorCode map_err = get_next_unwinder_after_native_frame(record, next_unwinder);
498+
if (map_err) {
499+
return map_err;
500+
}
501+
// The resolved unwinder should be PROG_UNWIND_RUBY (since PC is in rb_vm_exec
502+
// which is in interpreter_offsets) or PROG_UNWIND_NATIVE. Either way, we continue
503+
// with the Ruby VM stack walk below and the mapping state is now correct for when
504+
// we eventually hand off to the native unwinder.
505+
} else {
506+
// No frame pointers available: push a single dummy JIT frame.
507+
// We cannot walk the FP chain so we will not be able to resume native unwinding.
508+
// Mark jit_detected so that cfuncs are pushed inline and end-of-stack uses
509+
// PROG_UNWIND_STOP instead of PROG_UNWIND_NATIVE.
510+
record->rubyUnwindState.jit_detected = true;
511+
ErrorCode jit_error =
463512
push_ruby(&record->state, trace, RUBY_FRAME_TYPE_JIT, (u64)record->state.pc, 0, 0);
464-
if (error) {
465-
return error;
513+
if (jit_error) {
514+
return jit_error;
466515
}
467516
}
468517
}
@@ -474,8 +523,9 @@ static EBPF_INLINE ErrorCode walk_ruby_stack(
474523

475524
if (last_stack_frame <= stack_ptr) {
476525
// We have processed all frames in the Ruby VM and can stop here.
477-
// if this process has been JIT'd, the PC is invalid and we cannot resume native unwinding so
478-
// we are done
526+
// If we walked through JIT frames via FP, the state is clean and native unwinding
527+
// can continue. If JIT was detected without FP, the PC is still in the JIT region
528+
// and native unwinding would fail, so we stop.
479529
*next_unwinder = record->rubyUnwindState.jit_detected ? PROG_UNWIND_STOP : PROG_UNWIND_NATIVE;
480530
goto save_state;
481531
} else {

support/ebpf/types.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,11 @@ typedef struct RubyProcInfo {
491491

492492
// JIT regions, for detecting if a native PC was JIT
493493
u64 jit_start, jit_end;
494+
495+
// Whether the JIT is emitting frame pointers (e.g. --yjit-perf on x86_64, always on arm64).
496+
// When true, we walk the native FP chain through JIT frames instead of stopping.
497+
bool frame_pointers_enabled;
498+
494499
// Offsets and sizes of Ruby internal structs
495500

496501
// rb_execution_context_struct offsets:
@@ -731,7 +736,8 @@ typedef struct RubyUnwindState {
731736
void *last_stack_frame;
732737
// Frame for last cfunc before we switched to native unwinder
733738
u64 cfunc_saved_frame;
734-
// Detect if JIT code ran in the process (at any time)
739+
// Set when JIT code is detected in the current trace and frame pointers are not available.
740+
// Used to suppress native unwinding and push cfuncs inline.
735741
bool jit_detected;
736742
} RubyUnwindState;
737743

support/types.go

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)