Skip to content

Commit cf5feda

Browse files
ruby interpreter: fix latest offsets and execution context discovery (open-telemetry#710)
Co-authored-by: Dale Hamel <dale.hamel@shopify.com>
1 parent 10c4e55 commit cf5feda

20 files changed

+1365
-73
lines changed

interpreter/apmint/apmint.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"go.opentelemetry.io/ebpf-profiler/host"
2020
"go.opentelemetry.io/ebpf-profiler/interpreter"
2121
"go.opentelemetry.io/ebpf-profiler/libpf"
22+
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
2223
"go.opentelemetry.io/ebpf-profiler/libpf/pfunsafe"
2324
"go.opentelemetry.io/ebpf-profiler/remotememory"
2425
"go.opentelemetry.io/ebpf-profiler/support"
@@ -76,13 +77,18 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete
7677
return nil, fmt.Errorf("process storage export has wrong size %d", procStorageSym.Size)
7778
}
7879

79-
// Resolve thread info TLS export.
80-
tlsDescs, err := ef.TLSDescriptors()
81-
if err != nil {
82-
return nil, errors.New("failed to extract TLS descriptors")
80+
var tlsDescElfAddr libpf.Address
81+
if err = ef.VisitTLSRelocations(func(r pfelf.ElfReloc, symName string) bool {
82+
if symName == tlsExport {
83+
tlsDescElfAddr = libpf.Address(r.Off)
84+
return false
85+
}
86+
return true
87+
}); err != nil {
88+
return nil, errors.New(fmt.Sprintf("failed to visit TLS descriptor: %v", err))
8389
}
84-
tlsDescElfAddr, ok := tlsDescs[tlsExport]
85-
if !ok {
90+
91+
if tlsDescElfAddr == 0 {
8692
return nil, errors.New("failed to locate TLS descriptor")
8793
}
8894

interpreter/ruby/ruby.go

Lines changed: 142 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
2727
"go.opentelemetry.io/ebpf-profiler/libpf/pfunsafe"
2828
"go.opentelemetry.io/ebpf-profiler/metrics"
29+
npsr "go.opentelemetry.io/ebpf-profiler/nopanicslicereader"
2930
"go.opentelemetry.io/ebpf-profiler/remotememory"
3031
"go.opentelemetry.io/ebpf-profiler/successfailurecounter"
3132
"go.opentelemetry.io/ebpf-profiler/support"
@@ -88,6 +89,9 @@ type rubyData struct {
8889
// eBPF program to build ruby backtraces.
8990
currentCtxPtr libpf.Address
9091

92+
// Address to the ruby_current_ec variable in TLS, as an offset from tpbase
93+
currentEcTpBaseTlsOffset libpf.Address
94+
9195
// version of the currently used Ruby interpreter.
9296
// major*0x10000 + minor*0x100 + release (e.g. 3.0.1 -> 0x30001)
9397
version uint32
@@ -176,10 +180,18 @@ func rubyVersion(major, minor, release uint32) uint32 {
176180

177181
func (r *rubyData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libpf.Address,
178182
rm remotememory.RemoteMemory) (interpreter.Instance, error) {
183+
184+
var tlsOffset uint64
185+
if r.currentEcTpBaseTlsOffset != 0 {
186+
// Read TLS offset from the TLS descriptor.
187+
tlsOffset = rm.Uint64(bias + r.currentEcTpBaseTlsOffset + 8)
188+
}
189+
179190
cdata := support.RubyProcInfo{
180191
Version: r.version,
181192

182-
Current_ctx_ptr: uint64(r.currentCtxPtr + bias),
193+
Current_ctx_ptr: uint64(r.currentCtxPtr + bias),
194+
Current_ec_tpbase_tls_offset: tlsOffset,
183195

184196
Vm_stack: r.vmStructs.execution_context_struct.vm_stack,
185197
Vm_stack_size: r.vmStructs.execution_context_struct.vm_stack_size,
@@ -289,21 +301,43 @@ func (r *rubyInstance) readPathObjRealPath(addr libpf.Address) (string, error) {
289301
flags := r.rm.Ptr(addr)
290302
switch flags & rubyTMask {
291303
case rubyTString:
292-
// nothing to do
304+
return r.readRubyString(addr)
293305
case rubyTArray:
294-
var err error
295-
addr, err = r.readRubyArrayDataPtr(addr)
296-
if err != nil {
297-
return "", err
306+
vms := &r.r.vmStructs
307+
arrData, e := r.readRubyArrayDataPtr(addr)
308+
if e != nil {
309+
return "", e
310+
}
311+
312+
// Read contiguous pointer values into a buffer to be more efficient
313+
dataBytes := make([]byte, 2 * vms.size_of_value)
314+
if err := r.rm.Read(arrData, dataBytes); err != nil {
315+
return "", fmt.Errorf("failed to read array data bytes: %v", err)
298316
}
299317

300-
addr += pathObjRealPathIdx * libpf.Address(r.r.vmStructs.size_of_value)
301-
addr = r.rm.Ptr(addr) // deref VALUE -> RString object
318+
var relTag, absTag uint64
319+
relVal := npsr.Ptr(dataBytes, 0)
320+
absVal := npsr.Ptr(dataBytes, uint(vms.size_of_value))
321+
if absVal != 0 {
322+
absTag = uint64(r.rm.Ptr(absVal)) & uint64(rubyTMask)
323+
}
324+
325+
var candidate libpf.Address
326+
if absVal != 0 && absTag == uint64(rubyTString) {
327+
candidate = absVal
328+
} else if relVal != 0 {
329+
relTag = uint64(r.rm.Ptr(relVal)) & uint64(rubyTMask)
330+
if relTag == uint64(rubyTString) {
331+
candidate = relVal
332+
}
333+
} else {
334+
return "", fmt.Errorf("pathobj array has no string entries: relTag=0x%x absTag=0x%x", relTag, absTag)
335+
}
336+
337+
return r.readRubyString(candidate)
302338
default:
303339
return "", fmt.Errorf("unexpected pathobj type tag: 0x%X", flags&rubyTMask)
304340
}
305-
306-
return r.readRubyString(addr)
307341
}
308342

309343
// readRubyString extracts a Ruby string from the given addr.
@@ -718,17 +752,18 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr
718752
// Reason for lowest supported version:
719753
// - Ruby 2.5 is still commonly used at time of writing this code.
720754
// https://www.jetbrains.com/lp/devecosystem-2020/ruby/
721-
// Reason for maximum supported version 3.2.x:
755+
// Reason for maximum supported version 3.5.x:
722756
// - this is currently the newest stable version
723-
724-
minVer, maxVer := rubyVersion(2, 5, 0), rubyVersion(3, 3, 0)
757+
minVer, maxVer := rubyVersion(2, 5, 0), rubyVersion(3, 6, 0)
725758
if version < minVer || version >= maxVer {
726759
return nil, fmt.Errorf("unsupported Ruby %d.%d.%d (need >= %d.%d.%d and <= %d.%d.%d)",
727760
(version>>16)&0xff, (version>>8)&0xff, version&0xff,
728761
(minVer>>16)&0xff, (minVer>>8)&0xff, minVer&0xff,
729762
(maxVer>>16)&0xff, (maxVer>>8)&0xff, maxVer&0xff)
730763
}
731764

765+
log.Debugf("Ruby %d.%d.%d detected", (version>>16)&0xff, (version>>8)&0xff, version&0xff)
766+
732767
// Before Ruby 2.5 the symbol ruby_current_thread was used for the current execution
733768
// context but got replaced in [0] with ruby_current_execution_context_ptr.
734769
// With [1] the Ruby internal execution model changed and the symbol
@@ -740,26 +775,83 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr
740775
if version < rubyVersion(3, 0, 0) {
741776
currentCtxSymbol = "ruby_current_execution_context_ptr"
742777
}
743-
currentCtxPtr, err := ef.LookupSymbolAddress(currentCtxSymbol)
744-
if err != nil {
745-
return nil, fmt.Errorf("%v not found: %v", currentCtxSymbol, err)
746-
}
778+
779+
var currentEcTpBaseTlsOffset libpf.Address
780+
var interpRanges []util.Range
747781

748782
// rb_vm_exec is used to execute the Ruby frames in the Ruby VM and is called within
749783
// ruby_run_node which is the main executor function since Ruby v1.9.0
750784
// https://github.com/ruby/ruby/blob/587e6800086764a1b7c959976acef33e230dccc2/main.c#L47
751-
symbolName := libpf.SymbolName("rb_vm_exec")
785+
interpSymbolName := libpf.SymbolName("rb_vm_exec")
752786
if version < rubyVersion(2, 6, 0) {
753-
symbolName = libpf.SymbolName("ruby_exec_node")
787+
interpSymbolName = libpf.SymbolName("ruby_exec_node")
754788
}
755-
interpRanges, err := info.GetSymbolAsRanges(symbolName)
789+
790+
var rubyCurrentEcTlsSymbol = "ruby_current_ec"
791+
var currentEcSymbolAddress libpf.SymbolValue
792+
793+
currentEcSymbolName := libpf.SymbolName(rubyCurrentEcTlsSymbol)
794+
795+
log.Debugf("Ruby %d.%d.%d detected, looking for currentCtxPtr=%q, currentEcSymbol=%q",
796+
(version>>16)&0xff, (version>>8)&0xff, version&0xff, currentCtxSymbol, currentEcSymbolName)
797+
798+
// Symbol discovery strategy:
799+
// - Ruby < 3.0.4: Uses currentCtxPtr (global/ractor-based execution context)
800+
// - Ruby >= 3.0.4: Uses currentEcSymbol (TLS-based execution context via ruby_current_ec)
801+
// When direct lookup fails, VisitSymbols scans all symbols as fallback.
802+
// eBPF selects the appropriate method based on version at runtime.
803+
currentCtxPtr, err := ef.LookupSymbolAddress(currentCtxSymbol)
756804
if err != nil {
757-
return nil, err
805+
log.Debugf("Direct lookup of %v failed: %v, will try fallback", currentCtxSymbol, err)
806+
}
807+
808+
interpRanges, err = info.GetSymbolAsRanges(interpSymbolName)
809+
if err != nil {
810+
log.Debugf("Direct lookup of %v failed: %v, will try fallback", interpSymbolName, err)
811+
}
812+
813+
if err = ef.VisitSymbols(func(s libpf.Symbol) bool {
814+
if s.Name == currentEcSymbolName {
815+
currentEcSymbolAddress = s.Address
816+
}
817+
if s.Name == currentCtxSymbol {
818+
currentCtxPtr = s.Address
819+
}
820+
if len(interpRanges) == 0 && s.Name == interpSymbolName {
821+
interpRanges = []util.Range{{
822+
Start: uint64(s.Address),
823+
End: uint64(s.Address) + s.Size,
824+
}}
825+
}
826+
if len(interpRanges) > 0 && currentEcSymbolAddress != 0 && currentCtxPtr != 0 {
827+
return false
828+
}
829+
return true
830+
}); err != nil {
831+
log.Warnf("failed to visit symbols: %v", err)
832+
}
833+
834+
// NOTE for ruby 3.3.0+, if ruby is stripped, we have no way of locating
835+
// ruby_current_ec TLS symbol.
836+
// We could potentially add a fallback for this in the future, but for now
837+
// only unstripped ruby is supported. Many distro supplied rubies are stripped.
838+
if err = ef.VisitTLSRelocations(func(r pfelf.ElfReloc, symName string) bool {
839+
if symName == rubyCurrentEcTlsSymbol ||
840+
libpf.SymbolValue(r.Addend) == currentEcSymbolAddress {
841+
currentEcTpBaseTlsOffset = libpf.Address(r.Off)
842+
return false
843+
}
844+
return true
845+
}); err != nil {
846+
log.Warnf("failed to locate TLS descriptor: %v", err)
758847
}
759848

849+
log.Debugf("Discovered EC tls tpbase offset %x, fallback ctx %x, interp ranges: %v", currentEcTpBaseTlsOffset, currentCtxPtr, interpRanges)
850+
760851
rid := &rubyData{
761-
version: version,
762-
currentCtxPtr: libpf.Address(currentCtxPtr),
852+
version: version,
853+
currentEcTpBaseTlsOffset: libpf.Address(currentEcTpBaseTlsOffset),
854+
currentCtxPtr: libpf.Address(currentCtxPtr),
763855
}
764856

765857
vms := &rid.vmStructs
@@ -781,10 +873,14 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr
781873
// With Ruby 2.6 the field bp was added to rb_control_frame_t
782874
// https://github.com/ruby/ruby/commit/ed935aa5be0e5e6b8d53c3e7d76a9ce395dfa18b
783875
vms.control_frame_struct.size_of_control_frame_struct = 56
784-
default:
876+
case version < rubyVersion(3, 3, 0):
785877
// 3.1 adds new jit_return field at the end.
786878
// https://github.com/ruby/ruby/commit/9d8cc01b758f9385bd4c806f3daff9719e07faa0
787879
vms.control_frame_struct.size_of_control_frame_struct = 64
880+
default:
881+
// 3.3+ bp field was removed
882+
// https://github.com/ruby/ruby/commit/f302e725e10ae05e613e2c24cae0741f65f2db91
883+
vms.control_frame_struct.size_of_control_frame_struct = 56
788884
}
789885
vms.iseq_struct.body = 16
790886

@@ -803,11 +899,21 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr
803899
vms.iseq_constant_body.insn_info_size = 136
804900
vms.iseq_constant_body.succ_index_table = 144
805901
vms.iseq_constant_body.size_of_iseq_constant_body = 312
806-
default:
902+
case version < rubyVersion(3, 3, 0):
807903
vms.iseq_constant_body.insn_info_body = 112
808904
vms.iseq_constant_body.insn_info_size = 128
809905
vms.iseq_constant_body.succ_index_table = 136
810906
vms.iseq_constant_body.size_of_iseq_constant_body = 320
907+
case version >= rubyVersion(3, 4, 0) && version < rubyVersion(3, 5, 0):
908+
vms.iseq_constant_body.insn_info_body = 112
909+
vms.iseq_constant_body.insn_info_size = 128
910+
vms.iseq_constant_body.succ_index_table = 136
911+
vms.iseq_constant_body.size_of_iseq_constant_body = 352
912+
default: // 3.3.x and 3.5.x have the same values
913+
vms.iseq_constant_body.insn_info_body = 112
914+
vms.iseq_constant_body.insn_info_size = 128
915+
vms.iseq_constant_body.succ_index_table = 136
916+
vms.iseq_constant_body.size_of_iseq_constant_body = 344
811917
}
812918
vms.iseq_location_struct.pathobj = 0
813919
vms.iseq_location_struct.base_label = 8
@@ -854,10 +960,18 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr
854960
vms.size_of_value = 8
855961

856962
if version >= rubyVersion(3, 0, 0) {
857-
if runtime.GOARCH == "amd64" {
858-
vms.rb_ractor_struct.running_ec = 0x208
963+
if version >= rubyVersion(3, 3, 0) {
964+
if runtime.GOARCH == "amd64" {
965+
vms.rb_ractor_struct.running_ec = 0x180
966+
} else {
967+
vms.rb_ractor_struct.running_ec = 0x190
968+
}
859969
} else {
860-
vms.rb_ractor_struct.running_ec = 0x218
970+
if runtime.GOARCH == "amd64" {
971+
vms.rb_ractor_struct.running_ec = 0x208
972+
} else {
973+
vms.rb_ractor_struct.running_ec = 0x218
974+
}
861975
}
862976
}
863977

0 commit comments

Comments
 (0)