diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index 0a5f7922d..8bfb0dd6b 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -238,15 +238,28 @@ jobs: sudo go test ./interpreter/... -v -run TestIntegration distro-qemu-tests: - name: Full distro QEMU tests (kernel ${{ matrix.kernel }}) + name: Full distro QEMU tests (kernel ${{ matrix.kernel }} ${{ matrix.target_arch }}) runs-on: ubuntu-24.04 timeout-minutes: 15 strategy: matrix: - kernel: - #- 5.10.217 # 5.10 doesn't have bpf cookies - - 5.15.159 - - 6.8.10 # Post-6.6, supports multi-uprobe + include: + - { target_arch: amd64, kernel: 5.4.276 } + - { target_arch: amd64, kernel: 5.10.217 } + - { target_arch: amd64, kernel: 5.15.159 } + - { target_arch: amd64, kernel: 6.1.91 } + - { target_arch: amd64, kernel: 6.6.31 } + - { target_arch: amd64, kernel: 6.8.10 } + - { target_arch: amd64, kernel: 6.9.1 } + - { target_arch: amd64, kernel: 6.12.16 } + - { target_arch: amd64, kernel: 6.16 } + + # ARM64 (NOTE: older ARM64 kernels are not available in Cilium repos) + # TODO: get these working + #- { target_arch: arm64, kernel: 6.6.31 } + #- { target_arch: arm64, kernel: 6.8.4 } + #- { target_arch: arm64, kernel: 6.9.1 } + #- { target_arch: arm64, kernel: 6.12.16 } steps: - name: Clone code uses: actions/checkout@v4 @@ -258,12 +271,17 @@ jobs: - name: Install dependencies run: | sudo apt-get update -y - sudo apt-get install -y qemu-system-x86 debootstrap systemtap-sdt-dev + case "${{ matrix.target_arch }}" in + amd64) sudo apt-get -y install qemu-system-x86;; + arm64) sudo apt-get -y install qemu-system-arm;; + *) echo >&2 "bug: bad arch selected"; exit 1;; + esac + sudo apt-get install -y debootstrap systemtap-sdt-dev - name: Download kernel run: | cd test/distro-qemu ./download-kernel.sh ${{ matrix.kernel }} - - name: Run RTLD tests in QEMU + - name: Run Full Distro tests in QEMU run: | cd test/distro-qemu ./build-and-run.sh ${{ matrix.kernel }} diff --git a/interpreter/gpu/cuda.go b/interpreter/gpu/cuda.go index a23f491ba..dac6301c6 100644 --- a/interpreter/gpu/cuda.go +++ b/interpreter/gpu/cuda.go @@ -50,7 +50,6 @@ type data struct { type Instance struct { interpreter.InstanceStubs path string - link interpreter.LinkCloser pid libpf.PID } @@ -68,9 +67,9 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr if err != nil { return nil, err } + // We use the existence of the .note.stapsdt section to determine if this is a - // process that has libparcagpucupti.so loaded. Its cheaper and more reliable than loading - // the symbol table. + // process that has libparcagpucupti.so loaded. probes, err := ef.ParseUSDTProbes() if err != nil { return nil, err @@ -96,7 +95,6 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr return nil, nil } - func (d *data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, _ libpf.Address, _ remotememory.RemoteMemory) (interpreter.Instance, error) { // Maps usdt probe name to ebpf program name. @@ -115,12 +113,19 @@ func (d *data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, _ libpf.Addre progNames[i] = "usdt_parcagpu_cuda_kernel" } } - lc, err := ebpf.AttachUSDTProbes(pid, d.path, "cuda_probe", d.probes, cookies, progNames, true) - if err != nil { - return nil, err + + var lc interpreter.LinkCloser + if d.link == nil { + var err error + lc, err = ebpf.AttachUSDTProbes(pid, d.path, "cuda_probe", d.probes, cookies, progNames) + if err != nil { + return nil, err + } + log.Debugf("[cuda] parcagpu USDT probes attached for %s", d.path) + d.link = lc + } else { + log.Debugf("[cuda] parcagpu USDT probes already attached for %s", d.path) } - log.Debugf("[cuda] parcagpu USDT probes attached for %s", d.path) - d.link = lc // Create and register fixer for this PID fixer := &gpuTraceFixer{ @@ -129,24 +134,14 @@ func (d *data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, _ libpf.Addre } gpuFixers.Store(pid, fixer) - return &Instance{ - link: lc, path: d.path, pid: pid, }, nil } -// Detach removes the fixer for this PID and closes the link if needed. func (i *Instance) Detach(_ interpreter.EbpfHandler, _ libpf.PID) error { gpuFixers.Delete(i.pid) - - if i.link != nil { - log.Debugf("[cuda] parcagpu USDT probes closed for %s", i.path) - if err := i.link.Detach(); err != nil { - return err - } - } return nil } diff --git a/interpreter/instancestubs.go b/interpreter/instancestubs.go index fd89ebd2d..0aa4f7f03 100644 --- a/interpreter/instancestubs.go +++ b/interpreter/instancestubs.go @@ -73,7 +73,7 @@ func (m *EbpfHandlerStubs) DeleteProcData(libpf.InterpreterType, libpf.PID) erro } func (mockup *EbpfHandlerStubs) AttachUSDTProbes(libpf.PID, string, string, []pfelf.USDTProbe, - []uint64, []string, bool) (LinkCloser, error) { + []uint64, []string) (LinkCloser, error) { return nil, nil } diff --git a/interpreter/rtld/rtld.go b/interpreter/rtld/rtld.go index c8455c975..038b191c2 100644 --- a/interpreter/rtld/rtld.go +++ b/interpreter/rtld/rtld.go @@ -22,7 +22,6 @@ type data struct { // instance represents a per-PID instance of the dlopen interpreter type instance struct { interpreter.InstanceStubs - lc interpreter.LinkCloser } // Loader detects if the ELF file contains the dlopen symbol in its dynamic symbol table @@ -37,7 +36,6 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr // Look for the dlopen symbol in the dynamic symbol table sym, err := ef.LookupSymbol("dlopen") if err != nil || sym == nil { - // No dlopen symbol found, this library doesn't support dynamic loading return nil, nil } @@ -52,26 +50,21 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr // Attach attaches the uprobe to the dlopen function func (d *data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libpf.Address, _ remotememory.RemoteMemory) (interpreter.Instance, error) { - // Attach uprobe to dlopen using the address stored during Loader - lc, err := ebpf.AttachUprobe(pid, d.path, d.address, "uprobe_dlopen") - if err != nil { - return nil, fmt.Errorf("failed to attach uprobe to dlopen: %w", err) + var lc interpreter.LinkCloser + if d.lc == nil { + // Attach uprobe to dlopen using the address stored during Loader + var err error + lc, err = ebpf.AttachUprobe(pid, d.path, d.address, "uprobe_dlopen") + if err != nil { + return nil, fmt.Errorf("failed to attach uprobe to dlopen: %w", err) + } + d.lc = lc } log.Debugf("[dlopen] Attached uprobe to dlopen for PID %d on %s at 0x%x", pid, d.path, d.address) - d.lc = lc - return &instance{lc: lc}, nil -} - -// Detach removes the uprobe -func (i *instance) Detach(_ interpreter.EbpfHandler, pid libpf.PID) error { - log.Debugf("[dlopen] Detach called for PID %d", pid) - if i.lc != nil { - return i.lc.Detach() - } - return nil + return &instance{}, nil } // Unload cleans up the uprobe link diff --git a/interpreter/rtld/rtld_test.go b/interpreter/rtld/rtld_test.go index 67f576175..7eeb51e70 100644 --- a/interpreter/rtld/rtld_test.go +++ b/interpreter/rtld/rtld_test.go @@ -1,8 +1,6 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -//go:build amd64 && !integration - package rtld_test import ( @@ -15,6 +13,7 @@ import ( "github.com/coreos/pkg/dlopen" log "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/support" "go.opentelemetry.io/ebpf-profiler/testutils" @@ -23,22 +22,35 @@ import ( "go.opentelemetry.io/ebpf-profiler/util" ) -func TestIntegration(t *testing.T) { +func test(t *testing.T) { if !testutils.IsRoot() { t.Skip("This test requires root privileges") } + // Enable debug logging for CI debugging + if os.Getenv("DEBUG_TEST") != "" { + log.SetLevel(log.DebugLevel) + } + // Create a context for the tracer ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() + enabledTracers, err := tracertypes.Parse("RTLD") + require.NoError(t, err, "Failed to parse enabled tracers") + // Start the tracer with all tracers enabled traceCh, trc := testutils.StartTracer(ctx, t, - tracertypes.AllTracers(), + enabledTracers, &testutils.MockReporter{}, false) defer trc.Close() + trc.StartPIDEventProcessor(ctx) + + // tickle tihs process to speed things up + trc.ForceProcessPID(libpf.PID(uint32(os.Getpid()))) + // Consume traces to prevent blocking go func() { for { @@ -73,70 +85,20 @@ func TestIntegration(t *testing.T) { // Check that the metric was incremented return finalCount > initialCount - }, 10*time.Second, 50*time.Millisecond) + }, 10*time.Second, 100*time.Millisecond) } -func TestIntegrationSingleShot(t *testing.T) { - if !testutils.IsRoot() { - t.Skip("This test requires root privileges") - } - - // Enable debug logging for CI debugging - if os.Getenv("DEBUG_TEST") != "" { - log.SetLevel(log.DebugLevel) - } +func TestIntegration(t *testing.T) { + test(t) +} - // Override HasMultiUprobeSupport to force single-shot mode +func TestIntegrationSingleShot(t *testing.T) { + // Override HasMultiUprobeSupport to force single-shot mode on newer kernels. multiUProbeOverride := false util.SetTestOnlyMultiUprobeSupport(&multiUProbeOverride) defer util.SetTestOnlyMultiUprobeSupport(nil) - // Create a context for the tracer - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - // Start the tracer with all tracers enabled - traceCh, trc := testutils.StartTracer(ctx, t, - tracertypes.AllTracers(), - &testutils.MockReporter{}, - false) - defer trc.Close() - - // Consume traces to prevent blocking - go func() { - for { - select { - case <-ctx.Done(): - return - case <-traceCh: - // Discard traces - } - } - }() - - // retry a few times to get the metric, our process has to be detected and - // the dlopen uprobe has to attach. - require.Eventually(t, func() bool { - // Get the initial metric value - initialCount := getEBPFMetricValue(trc, metrics.IDDlopenUprobeHits) - //t.Logf("Initial dlopen uprobe metric count: %d", initialCount) - - // Use dlopen to load a shared library - // libm is a standard math library that's always present - lib, err := dlopen.GetHandle([]string{ - "/lib/x86_64-linux-gnu/libm.so.6", - "libm.so.6", - }) - require.NoError(t, err, "Failed to open libm.so.6") - defer lib.Close() - - // Get the metrics after dlopen - finalCount := getEBPFMetricValue(trc, metrics.IDDlopenUprobeHits) - //t.Logf("Final dlopen uprobe metric count: %d", finalCount) - - // Check that the metric was incremented - return finalCount > initialCount - }, 10*time.Second, 50*time.Millisecond) + test(t) } func getEBPFMetricValue(trc *tracer.Tracer, metricID metrics.MetricID) uint64 { diff --git a/interpreter/types.go b/interpreter/types.go index 0a4e77687..ef1698169 100644 --- a/interpreter/types.go +++ b/interpreter/types.go @@ -117,17 +117,13 @@ type EbpfHandler interface { // AttachUSDTProbes attaches an eBPF program to USDT probes in the specified binary. // // Parameters: - // - pid: The process ID. Required for older kernels (pre-6.6) that cannot attach to shared - // libraries without a PID. On newer kernels with multi-uprobe support, this is ignored - // when probeAll is true. + // - pid: The process ID. Required for getting path to exe via procfs. // - path: Full path to the binary containing the USDT probes. // - multiProgName: Name of eBPF program to use for multi-uprobe attachment (newer kernels). // - probes: The USDT probe definitions to attach to. // - cookies: Optional cookies to pass to the eBPF program (one per probe, or nil). // - singleProgNames: eBPF program names for single-shot attachment (older kernels, one // per probe). - // - probeAll: If true and the kernel supports it, attach to all processes using this - // binary. If false, only attach to the specified pid. // // Returns: // - LinkCloser: A handle to the attached probes. The caller must: @@ -136,14 +132,13 @@ type EbpfHandler interface { // 2. Call LinkCloser.Detach() from Instance.Detach() to detach from the specific PID // 3. Call LinkCloser.Unload() from Data.Unload() to fully clean up the eBPF program AttachUSDTProbes(pid libpf.PID, path, multiProgName string, probes []pfelf.USDTProbe, - cookies []uint64, singleProgNames []string, probeAll bool) (LinkCloser, error) + cookies []uint64, singleProgNames []string) (LinkCloser, error) // AttachUprobe attaches an eBPF uprobe to a function at a specific offset in a binary AttachUprobe(pid libpf.PID, path string, offset uint64, progName string) (LinkCloser, error) } type LinkCloser interface { - Detach() error Unload() error } diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index 1154e6d2e..208e46ed9 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -143,8 +143,9 @@ func LoadMaps(ctx context.Context, maps map[string]*cebpf.Map, } type linkCloser struct { - detachLink []link.Link - unloadLink link.Link + unloadLink []link.Link + unloadSpecIDs []uint32 // spec IDs to delete when unload happens + specMap *cebpf.Map // reference to the spec map for cleanup } // populateUSDTSpecMaps parses USDT probe arguments and populates the BPF spec maps. @@ -185,29 +186,60 @@ func populateUSDTSpecMaps(probes []pfelf.USDTProbe, specMap *cebpf.Map, startSpe return specIDs, nil } -func (lc *linkCloser) Detach() error { +/* + func (lc *linkCloser) Detach() error { + var errs []error + if lc.detachLink != nil { + for _, l := range lc.detachLink { + if err := l.Close(); err != nil { + errs = append(errs, err) + } + } + } + // Clean up spec IDs associated with detach + if lc.specMap != nil && len(lc.detachSpecIDs) > 0 { + for _, specID := range lc.detachSpecIDs { + if specID != 0 { + if err := lc.specMap.Delete(&specID); err != nil { + log.Debugf("Failed to delete spec ID %d from map: %v", specID, err) + errs = append(errs, err) + } else { + log.Debugf("Deleted spec ID %d from map during detach", specID) + } + } + } + } + return errors.Join(errs...) + } +*/ +func (lc *linkCloser) Unload() error { var errs []error - if lc.detachLink != nil { - for _, l := range lc.detachLink { + if lc.unloadLink != nil { + for _, l := range lc.unloadLink { if err := l.Close(); err != nil { errs = append(errs, err) } } } - return errors.Join(errs...) -} - -func (lc *linkCloser) Unload() error { - if lc.unloadLink != nil { - return lc.unloadLink.Close() + // Clean up spec IDs associated with unload + if lc.specMap != nil && len(lc.unloadSpecIDs) > 0 { + for _, specID := range lc.unloadSpecIDs { + if specID != 0 { + if err := lc.specMap.Delete(&specID); err != nil { + log.Debugf("Failed to delete spec ID %d from map: %v", specID, err) + errs = append(errs, err) + } else { + log.Debugf("Deleted spec ID %d from map during unload", specID) + } + } + } } - return nil + return errors.Join(errs...) } // AttachUSDTProbes allows interpreters to attach to usdt probes. func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName string, - probes []pfelf.USDTProbe, cookies []uint64, singleProgNames []string, - probeAll bool) (interpreter.LinkCloser, error) { + probes []pfelf.USDTProbe, cookies []uint64, singleProgNames []string) (interpreter.LinkCloser, error) { containerPath := fmt.Sprintf("/proc/%d/root/%s", pid, path) // TODO: This will crack open the exe with debug.elf and read symbols, we should @@ -286,6 +318,9 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st useMulti := util.HasMultiUprobeSupport() + // Determine PID for attachment + attachPID := 0 + // If multiProgName is empty or multi-probe not supported, use individual programs (one per probe) if multiProgName == "" || !useMulti { if singleProgNames == nil { @@ -326,7 +361,7 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st uprobeOpts := &link.UprobeOptions{ Address: probe.Location, RefCtrOffset: probe.SemaphoreOffset, - PID: int(pid), + PID: int(attachPID), } // Set cookie if provided @@ -348,7 +383,11 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st } log.Infof("Attached %d individual probes to %s in PID %d", len(links), path, pid) - return &linkCloser{detachLink: links}, nil + return &linkCloser{ + unloadLink: links, + unloadSpecIDs: specIDs, + specMap: impl.usdtSpecsMap, + }, nil } prog := impl.userProgs[multiProgName] @@ -363,12 +402,6 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st return nil, errors.New("attaching multiple probes requires multi support (kernel 6.6+)") } - // Determine PID for attachment - attachPID := int(pid) - if probeAll { - attachPID = 0 // 0 means all processes - } - // Single probe with single program - use single uprobe if len(probes) == 1 { uprobeOpts := &link.UprobeOptions{ @@ -376,7 +409,7 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st RefCtrOffset: probes[0].SemaphoreOffset, PID: attachPID, } - if finalCookies != nil && len(finalCookies) > 0 { + if len(finalCookies) > 0 { uprobeOpts.Cookie = finalCookies[0] } @@ -386,7 +419,11 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st probes[0].Name, probes[0].Location, err) } log.Infof("Attached probe %s to usdt %s in PID %d", multiProgName, path, pid) - return &linkCloser{unloadLink: l}, nil + return &linkCloser{ + unloadLink: []link.Link{l}, + unloadSpecIDs: specIDs, + specMap: impl.usdtSpecsMap, + }, nil } // Multiple probes - use UprobeMulti @@ -407,7 +444,11 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st } log.Infof("Attached probe %s to usdt %s in PID %d", multiProgName, path, pid) - return &linkCloser{unloadLink: lnk}, nil + return &linkCloser{ + unloadLink: []link.Link{lnk}, + unloadSpecIDs: specIDs, + specMap: impl.usdtSpecsMap, + }, nil } // loadProgram loads an eBPF program from progSpec and populates the related maps. @@ -475,27 +516,38 @@ func (impl *ebpfMapsImpl) AttachUprobe(pid libpf.PID, path string, offset uint64 impl.userProgs = make(map[string]*cebpf.Program) } + useMulti := util.HasMultiUprobeSupport() // Load the program if not already loaded prog := impl.userProgs[progName] if prog == nil { - if loadErr := impl.loadUSDTProgram(progName, false); loadErr != nil { + if loadErr := impl.loadUSDTProgram(progName, useMulti); loadErr != nil { return nil, loadErr } prog = impl.userProgs[progName] } - // Attach the uprobe - lnk, err := exe.Uprobe("", prog, &link.UprobeOptions{ - Address: offset, - PID: int(pid), - }) - if err != nil { - return nil, fmt.Errorf("failed to attach uprobe to %s at offset 0x%x: %w", - path, offset, err) + var lnk link.Link + if useMulti { + // Attach uprobe with multi support + lnk, err = exe.UprobeMulti([]string{progName}, prog, &link.UprobeMultiOptions{ + Addresses: []uint64{offset}, + }) + if err != nil { + return nil, fmt.Errorf("failed to attach uprobe-multi to %s at offset 0x%x: %w", + path, offset, err) + } + } else { + // Attach the uprobe + lnk, err = exe.Uprobe("", prog, &link.UprobeOptions{ + Address: offset, + }) + if err != nil { + return nil, fmt.Errorf("failed to attach uprobe to %s at offset 0x%x: %w", + path, offset, err) + } } - log.Infof("Attached uprobe %s to %s at offset 0x%x in PID %d", progName, path, offset, pid) - return &linkCloser{detachLink: []link.Link{lnk}}, nil + return &linkCloser{unloadLink: []link.Link{lnk}}, nil } func (impl *ebpfMapsImpl) CoredumpTest() bool { diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index 4cc4a3169..92f7314c0 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -140,10 +140,18 @@ func NewExecutableInfoManager( if includeTracers.Has(types.Labels) { interpreterLoaders = append(interpreterLoaders, golabels.Loader, customlabels.Loader) } - interpreterLoaders = append(interpreterLoaders, oomwatcher.Loader, rtld.Loader) + if includeTracers.Has(types.RTLD) { + interpreterLoaders = append(interpreterLoaders, rtld.Loader) + } + interpreterLoaders = append(interpreterLoaders, oomwatcher.Loader) if includeTracers.Has(types.CUDATracer) { - interpreterLoaders = append(interpreterLoaders, gpu.Loader) + // USDT support requires cookies + if util.HasBpfGetAttachCookie() { + interpreterLoaders = append(interpreterLoaders, gpu.Loader) + } else { + log.Warn("CUDA USDT tracing is not supported on this kernel (missing bpf_get_attach_cookie)") + } } deferredFileIDs, err := lru.NewSynced[host.FileID, libpf.Void](deferredFileIDSize, diff --git a/support/usdt/test/usdt_integration_test.go b/support/usdt/test/usdt_integration_test.go index 72792800e..7389f5092 100644 --- a/support/usdt/test/usdt_integration_test.go +++ b/support/usdt/test/usdt_integration_test.go @@ -21,6 +21,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/tracer" tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" + "go.opentelemetry.io/ebpf-profiler/util" ) type mockIntervals struct{} @@ -36,15 +37,15 @@ func (mockReporter) ExecutableMetadata(_ *reporter.ExecutableMetadataArgs) {} // testSetup encapsulates all the common test setup type testSetup struct { - t *testing.T - testBinary string - testProbes map[string]pfelf.USDTProbe - probeList []pfelf.USDTProbe - tracer *tracer.Tracer - ebpfHandler interpreter.EbpfHandler - resultsMap *cebpf.Map - ctx context.Context - cancelFunc context.CancelFunc + t *testing.T + testBinary string + testProbes map[string]pfelf.USDTProbe + probeList []pfelf.USDTProbe + tracer *tracer.Tracer + ebpfHandler interpreter.EbpfHandler + resultsMap *cebpf.Map + ctx context.Context + cancelFunc context.CancelFunc } // setupTest performs all common initialization for USDT integration tests @@ -53,6 +54,10 @@ func setupTest(t *testing.T) *testSetup { t.Skip("This test requires root privileges to load eBPF programs") } + if !util.HasBpfGetAttachCookie() { + t.Skip("This test requires kernel support for bpf_get_attach_cookie") + } + // Get the test binary path testBinary, err := os.Executable() if err != nil { @@ -218,14 +223,14 @@ func TestUSDTProbeWithEBPFSingle(t *testing.T) { // Individual program names for each probe progNames := []string{ - "usdt_simple_probe", - "usdt_memory_probe", - "usdt_const_probe", - "usdt_mixed_probe", - "usdt_int32_args", - "usdt_int64_args", - "usdt_mixed_refs", - "usdt_uint8_args", + "simple_probe", + "memory_probe", + "const_probe", + "mixed_probe", + "int32_args", + "int64_args", + "mixed_refs", + "uint8_args", } // Attach USDT probes with individual programs @@ -237,12 +242,11 @@ func TestUSDTProbeWithEBPFSingle(t *testing.T) { setup.probeList, nil, // no user cookies, just spec IDs progNames, - false, // attach to current PID only ) if err != nil { t.Fatalf("failed to attach USDT probes: %v", err) } - defer lc.Detach() + defer lc.Unload() // Log what was attached for i, probe := range setup.probeList { @@ -260,6 +264,10 @@ func TestUSDTProbeWithEBPFMulti(t *testing.T) { setup := setupTest(t) defer setup.cleanup() + if !util.HasMultiUprobeSupport() { + t.Skip("This test requires kernel support for uprobe multi-attach") + } + // Use probe IDs (1-8) as cookies for dispatch in the multi-probe program cookies := []uint64{1, 2, 3, 4, 5, 6, 7, 8} @@ -272,12 +280,11 @@ func TestUSDTProbeWithEBPFMulti(t *testing.T) { setup.probeList, cookies, // cookies for dispatch (probe IDs 1-8) nil, // no individual programs - false, // attach to current PID only ) if err != nil { t.Fatalf("failed to attach USDT probes: %v", err) } - defer lc.Detach() + defer lc.Unload() // Log what was attached t.Logf("Attached multi-probe program usdt_test_multi to %d probes", len(setup.probeList)) diff --git a/test/distro-qemu/build-and-run.sh b/test/distro-qemu/build-and-run.sh index 7334a2481..956ac6703 100755 --- a/test/distro-qemu/build-and-run.sh +++ b/test/distro-qemu/build-and-run.sh @@ -14,6 +14,16 @@ CACHE_DIR="${CACHE_DIR:-/tmp/debootstrap-cache}" echo "Building rootfs with $DISTRO $RELEASE..." # Clean up previous builds +# First, unmount any leftover mounts from previous debootstrap runs +if [ -d "$ROOTFS_DIR" ]; then + echo "Cleaning up any mounted filesystems in $ROOTFS_DIR..." + # Find all mount points under ROOTFS_DIR and unmount them in reverse order (deepest first) + findmnt -o TARGET -n -l | grep "^$(pwd)/$ROOTFS_DIR" | sort -r | while read -r mountpoint; do + echo " Unmounting $mountpoint" + sudo umount "$mountpoint" || sudo umount -l "$mountpoint" || true + done +fi + sudo rm -rf "$ROOTFS_DIR" "$OUTPUT_DIR" mkdir -p "$ROOTFS_DIR" "$OUTPUT_DIR" "$CACHE_DIR" @@ -85,11 +95,16 @@ if [[ "${USE_DOCKER}" == "1" ]] && command -v docker &> /dev/null; then wget -q https://go.dev/dl/go1.24.7.linux-${GOARCH}.tar.gz && \ tar -C /usr/local -xzf go1.24.7.linux-${GOARCH}.tar.gz && \ export PATH=/usr/local/go/bin:\$PATH && \ - CGO_ENABLED=1 go test -c ../../interpreter/rtld ../../support/usdt" + CGO_ENABLED=1 go test -c ../../interpreter/rtld ../../support/usdt/test" else # Local build with cross-compilation if needed echo "Building locally for ${GOARCH}..." - CGO_ENABLED=1 GOARCH=${GOARCH} go test -c ../../interpreter/rtld ../../support/usdt + if [ "$GOARCH" = "arm64" ]; then + # Cross-compile for ARM64 using aarch64-linux-gnu-gcc + CGO_ENABLED=1 GOARCH=${GOARCH} CC=aarch64-linux-gnu-gcc go test -c ../../interpreter/rtld ../../support/usdt/test + else + CGO_ENABLED=1 GOARCH=${GOARCH} go test -c ../../interpreter/rtld ../../support/usdt/test + fi fi # Copy test binary into rootfs @@ -129,7 +144,7 @@ export DEBUG_TEST=1 # Run the tests echo "" -/rtld.test -test.v && /usdt.test -test.v +/rtld.test -test.v && /test.test -test.v RESULT=$? if [ $RESULT -eq 0 ]; then @@ -204,7 +219,8 @@ echo "" echo "===== Starting QEMU with kernel ${KERNEL_VERSION} on ${QEMU_ARCH} =====" echo "" -# Run QEMU +# Run QEMU and capture output +QEMU_OUTPUT=$(mktemp) ${sudo} qemu-system-${QEMU_ARCH} ${additionalQemuArgs} \ -nographic \ -monitor none \ @@ -214,15 +230,28 @@ ${sudo} qemu-system-${QEMU_ARCH} ${additionalQemuArgs} \ -initrd "$OUTPUT_DIR/initramfs.gz" \ -append "${CONSOLE_ARG} init=/init quiet loglevel=3" \ -no-reboot \ - -display none - -EXIT_CODE=$? + -display none \ + | tee "$QEMU_OUTPUT" -# QEMU with sysrq poweroff returns 0 on clean shutdown -if [ $EXIT_CODE -eq 0 ]; then +# Parse output for test result +if grep -q "===== TEST PASSED =====" "$QEMU_OUTPUT"; then + rm -f "$QEMU_OUTPUT" + echo "" echo "✅ Test completed successfully" exit 0 +elif grep -q "===== TEST FAILED" "$QEMU_OUTPUT"; then + rm -f "$QEMU_OUTPUT" + echo "" + echo "❌ Test failed" + exit 1 +elif grep -q "===== TEST TIMED OUT =====" "$QEMU_OUTPUT"; then + rm -f "$QEMU_OUTPUT" + echo "" + echo "❌ Test timed out" + exit 124 else - echo "❌ Test failed with QEMU exit code $EXIT_CODE" - exit $EXIT_CODE + rm -f "$QEMU_OUTPUT" + echo "" + echo "❌ Could not determine test result (QEMU may have crashed)" + exit 2 fi \ No newline at end of file diff --git a/tools/coredump/ebpfmaps.go b/tools/coredump/ebpfmaps.go index 548f2bf0d..ccf58a0b0 100644 --- a/tools/coredump/ebpfmaps.go +++ b/tools/coredump/ebpfmaps.go @@ -266,7 +266,7 @@ func (emc *ebpfMapsCoredump) SupportsLPMTrieBatchOperations() bool { } func (emc *ebpfMapsCoredump) AttachUSDTProbes(_ libpf.PID, _, _ string, _ []pfelf.USDTProbe, - _ []uint64, _ []string, _ bool) (interpreter.LinkCloser, error) { + _ []uint64, _ []string) (interpreter.LinkCloser, error) { return nil, nil } diff --git a/tracer/tracer.go b/tracer/tracer.go index 3c5975ae7..1a3b010fa 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -1204,3 +1204,7 @@ func (t *Tracer) GetEbpfHandler() interpreter.EbpfHandler { func (t *Tracer) GetInterpretersForPID(pid libpf.PID) []interpreter.Instance { return t.processManager.GetInterpretersForPID(pid) } + +func (t *Tracer) ForceProcessPID(pid libpf.PID) { + t.pidEvents <- libpf.PIDTID(uint64(pid) + uint64(pid)<<32) +} diff --git a/tracer/types/parse.go b/tracer/types/parse.go index 775cde3d4..0295185b2 100644 --- a/tracer/types/parse.go +++ b/tracer/types/parse.go @@ -25,6 +25,7 @@ const ( LuaJITTracer GoTracer Labels + RTLD CUDATracer // maxTracers indicates the max. number of different tracers @@ -42,6 +43,7 @@ var tracerTypeToName = map[tracerType]string{ LuaJITTracer: "luajit", GoTracer: "go", Labels: "labels", + RTLD: "rtld", CUDATracer: "cuda", } diff --git a/util/util.go b/util/util.go index 375c87aba..e64db0aac 100644 --- a/util/util.go +++ b/util/util.go @@ -5,6 +5,7 @@ package util // import "go.opentelemetry.io/ebpf-profiler/util" import ( "bytes" + "errors" "fmt" "math/bits" "strings" @@ -16,6 +17,7 @@ import ( "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/link" log "github.com/sirupsen/logrus" "go.opentelemetry.io/ebpf-profiler/libpf/hash" "golang.org/x/sys/unix" @@ -106,6 +108,9 @@ var ( // multiUprobeSupportCache caches the result of probing for multi-uprobe support multiUprobeSupportOnce sync.Once multiUprobeSupportCached bool + // bpfGetAttachCookieCache caches the result of probing for bpf_get_attach_cookie support + bpfGetAttachCookieOnce sync.Once + bpfGetAttachCookieCached bool ) // SetTestOnlyMultiUprobeSupport overrides HasMultiUprobeSupport for testing. @@ -145,10 +150,99 @@ func probeBpfGetAttachCookie() bool { return true } +// HasBpfGetAttachCookie checks if the kernel supports the bpf_get_attach_cookie helper. +// This function uses a cached, once-calculated value for performance. +// +// Note: This function requires CAP_BPF or CAP_SYS_ADMIN capabilities to load the probe +// program. The profiler should already have these privileges. +func HasBpfGetAttachCookie() bool { + bpfGetAttachCookieOnce.Do(func() { + bpfGetAttachCookieCached = probeBpfGetAttachCookie() + }) + + return bpfGetAttachCookieCached +} + +// probeBpfUprobeMultiLink probes for uprobe_multi link support by attempting to create +// an invalid uprobe_multi link. This is modeled after libbpf's probe_uprobe_multi_link. +// +// The probe works in two steps: +// 1. Try to create a link to "/" (invalid binary) which should fail with EBADF if supported +// 2. Verify PID filtering works correctly by testing with pid=-1 (should fail with EINVAL) +// +// The second check is important because early kernel versions had broken PID filtering +// (they did thread filtering instead of process filtering). +func probeBpfUprobeMultiLink() bool { + // Create a minimal program with BPF_TRACE_UPROBE_MULTI expected attach type + insns := asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + } + + spec := &ebpf.ProgramSpec{ + Type: ebpf.Kprobe, + Instructions: insns, + License: "GPL", + AttachType: ebpf.AttachTraceUprobeMulti, + AttachTo: "", + } + + prog, err := ebpf.NewProgramWithOptions(spec, ebpf.ProgramOptions{ + LogDisabled: true, + }) + if err != nil { + return false + } + defer func() { + if err := prog.Close(); err != nil { + log.Warnf("Failed to close probe program: %v", err) + } + }() + + // Creating uprobe in '/' binary should fail with EBADF if uprobe_multi is supported + ex, err := link.OpenExecutable("/") + if err != nil { + return false + } + + offset := uint64(0) + opts := &link.UprobeMultiOptions{ + Addresses: []uint64{offset}, + } + + lnk, err := ex.UprobeMulti(nil, prog, opts) + if err == nil { + // Unexpectedly succeeded, clean up and return false + _ = lnk.Close() + return false + } + // Check if we got EBADF (expected error for invalid binary with uprobe_multi support) + if !errors.Is(err, unix.EBADF) { + return false + } + + // Verify PID filtering works correctly. Initial multi-uprobe support in kernel + // didn't handle PID filtering correctly (it was doing thread filtering, not process + // filtering). We need to be conservative here because multi-uprobe selection happens + // early at load time, while the use of PID filtering is known late at attachment time. + // + // Creating uprobe with pid == -1 (invalid PID) for '/' binary should fail with EINVAL + // on kernels with fixed PID filtering logic; otherwise ESRCH or EBADF would be returned. + opts.PID = ^uint32(0) // -1 as unsigned + lnk, err = ex.UprobeMulti(nil, prog, opts) + if err == nil { + // Unexpectedly succeeded, clean up and return false + _ = lnk.Close() + return false + } + + // We expect EINVAL for invalid PID on kernels with proper PID filtering + return errors.Is(err, unix.EINVAL) +} + // HasMultiUprobeSupport checks if the kernel supports uprobe multi-attach. // Multi-uprobes are needed because single-shot uprobes don't work for shared libraries. -// This function probes for bpf_get_attach_cookie support, which is required for -// multi-uprobes and was introduced alongside them in kernel 6.6. +// This function probes for uprobe_multi link support, which was introduced in kernel 6.6. // // Note: This function requires CAP_BPF or CAP_SYS_ADMIN capabilities to load the probe // program. The profiler should already have these privileges. @@ -158,7 +252,7 @@ func HasMultiUprobeSupport() bool { } multiUprobeSupportOnce.Do(func() { - multiUprobeSupportCached = probeBpfGetAttachCookie() + multiUprobeSupportCached = probeBpfUprobeMultiLink() }) return multiUprobeSupportCached