Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 164 additions & 9 deletions interpreter/ruby/ruby.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ import (
"go.opentelemetry.io/ebpf-profiler/libpf"
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
"go.opentelemetry.io/ebpf-profiler/libpf/pfunsafe"
"go.opentelemetry.io/ebpf-profiler/lpm"
"go.opentelemetry.io/ebpf-profiler/metrics"
npsr "go.opentelemetry.io/ebpf-profiler/nopanicslicereader"
"go.opentelemetry.io/ebpf-profiler/process"
"go.opentelemetry.io/ebpf-profiler/remotememory"
"go.opentelemetry.io/ebpf-profiler/reporter"
"go.opentelemetry.io/ebpf-profiler/successfailurecounter"
"go.opentelemetry.io/ebpf-profiler/support"
"go.opentelemetry.io/ebpf-profiler/util"
Expand Down Expand Up @@ -104,14 +107,16 @@ var (
// regex to extract a version from a string
rubyVersionRegex = regexp.MustCompile(`^(\d+)\.(\d+)\.(\d+)$`)

unknownCfunc = libpf.Intern("<unknown cfunc>")
cfuncDummyFile = libpf.Intern("<cfunc>")
rubyGcFrame = libpf.Intern("(garbage collection)")
rubyGcRunning = libpf.Intern("(running)")
rubyGcMarking = libpf.Intern("(marking)")
rubyGcSweeping = libpf.Intern("(sweeping)")
rubyGcCompacting = libpf.Intern("(compacting)")
rubyGcDummyFile = libpf.Intern("<gc>")
unknownCfunc = libpf.Intern("<unknown cfunc>")
cfuncDummyFile = libpf.Intern("<cfunc>")
rubyGcFrame = libpf.Intern("(garbage collection)")
rubyGcRunning = libpf.Intern("(running)")
rubyGcMarking = libpf.Intern("(marking)")
rubyGcSweeping = libpf.Intern("(sweeping)")
rubyGcCompacting = libpf.Intern("(compacting)")
rubyGcDummyFile = libpf.Intern("<gc>")
rubyJitDummyFrame = libpf.Intern("<unknown jit code>")
rubyJitDummyFile = libpf.Intern("<jitted code>")
// compiler check to make sure the needed interfaces are satisfied
_ interpreter.Data = &rubyData{}
_ interpreter.Instance = &rubyInstance{}
Expand Down Expand Up @@ -376,6 +381,8 @@ func (r *rubyData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libp
procInfo: &cdata,
globalSymbolsAddr: r.globalSymbolsAddr + bias,
addrToString: addrToString,
mappings: make(map[process.RawMapping]uint32),
prefixes: make(map[lpm.Prefix]uint32),
memPool: sync.Pool{
New: func() any {
buf := make([]byte, 512)
Expand Down Expand Up @@ -425,6 +432,7 @@ type rubyInstance struct {

// lastId is a cached copy index of the final entry in the global symbol table
lastId uint32

// globalSymbolsAddr is the offset of the global symbol table, for looking up ruby symbolic ids
globalSymbolsAddr libpf.Address

Expand All @@ -437,10 +445,31 @@ type rubyInstance struct {
// maxSize is the largest number we did see in the last reporting interval for size
// in getRubyLineNo.
maxSize atomic.Uint32

// mappings is indexed by the Mapping to its generation.
// Entries are pruned each SynchronizeMappings call; the map size is bounded
// by the number of executable anonymous mappings for this process (typically
// a handful for JIT code pages plus any native gems with anonymous exec pages).
mappings map[process.RawMapping]uint32
// prefixes is indexed by the prefix added to ebpf maps (to be cleaned up) to its generation
prefixes map[lpm.Prefix]uint32
// mappingGeneration is the current generation (so old entries can be pruned)
mappingGeneration uint32
}

func (r *rubyInstance) Detach(ebpf interpreter.EbpfHandler, pid libpf.PID) error {
return ebpf.DeleteProcData(libpf.Ruby, pid)
var err error
err = ebpf.DeleteProcData(libpf.Ruby, pid)

for prefix := range r.prefixes {
if err2 := ebpf.DeletePidInterpreterMapping(pid, prefix); err2 != nil {
err = errors.Join(err,
fmt.Errorf("failed to remove ruby prefix 0x%x/%d: %v",
prefix.Key, prefix.Length, err2))
}
}

return err
}

// UpdateLibcInfo is called when libc introspection data becomes available.
Expand Down Expand Up @@ -1115,6 +1144,15 @@ func (r *rubyInstance) Symbolize(ef libpf.EbpfFrame, frames *libpf.Frames, _ lib
SourceLine: 0,
})
return nil
case support.RubyFrameTypeJit:
label := rubyJitDummyFrame
frames.Append(&libpf.Frame{
Type: libpf.RubyFrame,
FunctionName: label,
SourceFile: rubyJitDummyFile,
SourceLine: 0,
})
return nil
default:
return fmt.Errorf("Unable to get CME or ISEQ from frame address (%d)", frameAddrType)
}
Expand Down Expand Up @@ -1244,6 +1282,123 @@ func profileFrameFullLabel(classPath, label, baseLabel, methodName libpf.String,
return libpf.Intern(profileLabel)
}

// findJITRegion detects the YJIT JIT code region from process memory mappings.
// YJIT reserves a large contiguous address range (typically 48-128 MiB) via mmap
// with PROT_NONE and then mprotects individual 16k codepages to r-x as needed.
// On systems with CONFIG_ANON_VMA_NAME, Ruby labels the region via prctl(PR_SET_VMA)
// giving it a path like "[anon:Ruby:rb_yjit_reserve_addr_space]".
// On systems without that config, we fall back to a heuristic: the first anonymous
// executable mapping (by address) is assumed to be the JIT region since YJIT
// initializes before any gems could create anonymous executable mappings.
// Returns (start, end, found).
func findJITRegion(mappings []process.RawMapping) (uint64, uint64, bool) {
var jitStart, jitEnd uint64
labelFound := false
var heuristicStart, heuristicEnd uint64
heuristicFound := false

for idx := range mappings {
m := &mappings[idx]

// Check for prctl-labeled JIT region. These mappings may be ---p (PROT_NONE)
// or r-xp depending on whether YJIT has activated codepages in this region.
if strings.Contains(m.Path, "jit_reserve_addr_space") {
if !labelFound || m.Vaddr < jitStart {
jitStart = m.Vaddr
}
if !labelFound || m.Vaddr+m.Length > jitEnd {
jitEnd = m.Vaddr + m.Length
}
labelFound = true
continue
}

// Heuristic fallback: first anonymous executable mapping by address.
// Mappings from /proc/pid/maps are sorted by address, so the first
// match is the lowest address.
if !heuristicFound && m.IsExecutable() && m.IsAnonymous() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does the label detection extends start/end jit area, but the heuristic sets it to the first found?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's also a bit of disconnection with the interpreter mappings: we UpdatePidInterpreterMapping for all mappings, but only set jit start/end for the first one. Is this intentional?

heuristicStart = m.Vaddr
heuristicEnd = m.Vaddr + m.Length
heuristicFound = true
}
}

if labelFound {
return jitStart, jitEnd, true
}
if heuristicFound {
return heuristicStart, heuristicEnd, true
}
return 0, 0, false
}

func (r *rubyInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
Copy link
Copy Markdown
Contributor Author

@dalehamel dalehamel Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was cargo-culted from the node interpreter

_ reporter.ExecutableReporter, pr process.Process, mappings []process.RawMapping) error {
pid := pr.PID()
r.mappingGeneration++

log.Debugf("Synchronizing ruby mappings")

// Register LPM prefixes for executable anonymous mappings.
for idx := range mappings {
m := &mappings[idx]
if !m.IsExecutable() || !m.IsAnonymous() {
continue
}

isNew := false
if _, exists := r.mappings[*m]; !exists {
isNew = true
log.Debugf("Enabling Ruby interpreter for %#x/%#x", m.Vaddr, m.Length)
}
r.mappings[*m] = r.mappingGeneration

prefixes, err := lpm.CalculatePrefixList(m.Vaddr, m.Vaddr+m.Length)
if err != nil {
return fmt.Errorf("new anonymous mapping lpm failure %#x/%#x: %w", m.Vaddr, m.Length, err)
}

for _, prefix := range prefixes {
if isNew {
if err := ebpf.UpdatePidInterpreterMapping(pid, prefix,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My only tiny suggestion remains the same is to consider taking advantage of the fact that we know the full size of the yjit area (48/128/x mib) and we can just find the first mapping and assume the whole area belongs to the ruby interpreter without figuring out which subset of it has already been occupied/ garbage collected by the jit. This would simplify go code, there will be less map updates/deletitions every time something is recompiled, maps will be smaller.

#1102 (comment)

support.ProgUnwindRuby, 0, 0); err != nil {
return err
}
}
r.prefixes[prefix] = r.mappingGeneration
}
}
// Detect JIT region from all mappings and update proc data if changed.
jitStart, jitEnd, jitFound := findJITRegion(mappings)
if jitFound && (r.procInfo.Jit_start != jitStart || r.procInfo.Jit_end != jitEnd) {
r.procInfo.Jit_start = jitStart
r.procInfo.Jit_end = jitEnd
if err := ebpf.UpdateProcData(libpf.Ruby, pr.PID(), unsafe.Pointer(r.procInfo)); err != nil {
return err
}
log.Debugf("Updated JIT region %#x-%#x in ruby proc info", jitStart, jitEnd)
}
// Remove prefixes not seen
for prefix, gen := range r.prefixes {
if gen == r.mappingGeneration {
continue
}
if err := ebpf.DeletePidInterpreterMapping(pid, prefix); err != nil {
log.Debugf("Failed to delete Ruby prefix %#v: %v", prefix, err)
}
delete(r.prefixes, prefix)
}
for m, gen := range r.mappings {
if gen == r.mappingGeneration {
continue
}
log.Debugf("Disabling Ruby for %#x/%#x", m.Vaddr, m.Length)
delete(r.mappings, m)
}

return nil
}

func (r *rubyInstance) GetAndResetMetrics() ([]metrics.Metric, error) {
addrToStringStats := r.addrToString.ResetMetrics()

Expand Down
128 changes: 128 additions & 0 deletions interpreter/ruby/ruby_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
package ruby // import "go.opentelemetry.io/ebpf-profiler/interpreter/ruby"

import (
"debug/elf"
"testing"

"go.opentelemetry.io/ebpf-profiler/libpf"
"go.opentelemetry.io/ebpf-profiler/process"

"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -234,3 +236,129 @@ func TestProfileFrameFullLabel(t *testing.T) {
})
}
}

func TestFindJITRegion(t *testing.T) {
execAnon := func(vaddr, length uint64) process.RawMapping {
return process.RawMapping{
Vaddr: vaddr,
Length: length,
Flags: elf.PF_R | elf.PF_X,
Path: "",
}
}
labeled := func(vaddr, length uint64) process.RawMapping {
return process.RawMapping{
Vaddr: vaddr,
Length: length,
Flags: 0, // ---p (PROT_NONE)
Path: "[anon:Ruby:rb_yjit_reserve_addr_space]",
}
}
fileBacked := func(vaddr, length uint64, path string) process.RawMapping {
return process.RawMapping{
Vaddr: vaddr,
Length: length,
Flags: elf.PF_R | elf.PF_X,
Path: path,
}
}

tests := []struct {
name string
mappings []process.RawMapping
wantStart uint64
wantEnd uint64
wantFound bool
}{
{
name: "no mappings",
mappings: nil,
wantFound: false,
},
{
name: "only file-backed mappings",
mappings: []process.RawMapping{
fileBacked(0x400000, 0x1000, "/usr/bin/ruby"),
fileBacked(0x7f0000, 0x2000, "/lib/libc.so.6"),
},
wantFound: false,
},
{
name: "labeled JIT region (single mapping)",
mappings: []process.RawMapping{
fileBacked(0x400000, 0x1000, "/usr/bin/ruby"),
labeled(0x7f17d99b9000, 0x8000000),
},
wantStart: 0x7f17d99b9000,
wantEnd: 0x7f17d99b9000 + 0x8000000,
wantFound: true,
},
{
name: "labeled JIT region with holes (multiple contiguous mappings)",
mappings: []process.RawMapping{
fileBacked(0x400000, 0x1000, "/usr/bin/ruby"),
{
Vaddr: 0x7f17d99b9000,
Length: 0x15f000,
Flags: elf.PF_R | elf.PF_X,
Path: "[anon:Ruby:rb_yjit_reserve_addr_space]",
},
{
Vaddr: 0x7f17d9b18000,
Length: 0x119000,
Flags: elf.PF_R | elf.PF_X,
Path: "[anon:Ruby:rb_yjit_reserve_addr_space]",
},
{
Vaddr: 0x7f17d9c31000,
Length: 0x7d88000,
Flags: 0, // ---p reserved
Path: "[anon:Ruby:rb_yjit_reserve_addr_space]",
},
},
wantStart: 0x7f17d99b9000,
wantEnd: 0x7f17d9c31000 + 0x7d88000,
wantFound: true,
},
{
name: "heuristic fallback - first anonymous executable mapping",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking only the first mapping may be not correct.

I did not test, but looks like it would not catch at least this case.

7f17d99b9000-7f17d9b18000 r-xp 00000000 00:00 0
7f17d9b18000-7f17d9c31000 r-xp 00000000 00:00 0
7f17d9c31000-7f17e19b9000 ---p 00000000 00:00 0

Both of this mappings belong to the jit area. And there may be many more cases. There may be more mappings later. Furthermore once full jit area is occupied some of the pages may be grabage collected, so there may be holes in it.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why we reject the second mapping in the heuristic fallback - first anonymous executable mapping testcase.

				execAnon(0x7f0000100000, 0x4000),
				execAnon(0x7f0000200000, 0x8000),

mappings: []process.RawMapping{
fileBacked(0x400000, 0x1000, "/usr/bin/ruby"),
execAnon(0x7f0000100000, 0x4000),
execAnon(0x7f0000200000, 0x8000),
},
wantStart: 0x7f0000100000,
wantEnd: 0x7f0000100000 + 0x4000,
wantFound: true,
},
{
name: "labeled takes precedence over heuristic",
mappings: []process.RawMapping{
execAnon(0x1000000, 0x4000),
labeled(0x7f0000000000, 0x3000000),
},
wantStart: 0x7f0000000000,
wantEnd: 0x7f0000000000 + 0x3000000,
wantFound: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
start, end, found := findJITRegion(tt.mappings)
if found != tt.wantFound {
t.Errorf("found = %v, want %v", found, tt.wantFound)
return
}
if !found {
return
}
if start != tt.wantStart {
t.Errorf("start = %#x, want %#x", start, tt.wantStart)
}
if end != tt.wantEnd {
t.Errorf("end = %#x, want %#x", end, tt.wantEnd)
}
})
}
}
10 changes: 7 additions & 3 deletions process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,18 @@ func iterateMappings(mapsFile io.Reader, callback func(m RawMapping) bool) (uint

var path string
if inode == 0 {
if fields[5] == "[vdso]" {
switch fieldValue := fields[5]; {
case fieldValue == "[vdso]":
// Map to something filename looking with synthesized inode
path = VdsoPathName
device = 0
inode = vdsoInode
} else if fields[5] == "" {
case fieldValue == "":
// This is an anonymous mapping, keep it
} else {
case strings.HasPrefix(fieldValue, "[anon:"):
// This is an anonymous mapping named with prctl(PR_SET_VMA), keep the name
path = trimMappingPath(fieldValue)
default:
// Ignore other mappings that are invalid, non-existent or are special pseudo-files
continue
}
Expand Down
Loading
Loading