Use test helper infra in cuda e2e tests to simplify and remove

gnurizen · gnurizen · commit 6ec57b2cc510 · 2026-03-12T09:15:49.000-04:00
InstrumentCudaLaunch

Also remove CUDAVerifier tests as that is subsumed by the e2e tests
and the arm tests take 5m with all that extra bpf verification going
through qemu
diff --git a/test/cudaverify/cuda_verifier_test.go b/test/cudaverify/cuda_verifier_test.go
@@ -6,7 +6,6 @@ import (
 	"bytes"
 	"context"
 	"flag"
-	"math"
 	"os"
 	"testing"
 	"time"
@@ -15,213 +14,15 @@ import (
 	"github.com/cilium/ebpf/perf"
 	"github.com/stretchr/testify/require"
 
-	"go.opentelemetry.io/ebpf-profiler/interpreter"
 	"go.opentelemetry.io/ebpf-profiler/interpreter/gpu"
 	"go.opentelemetry.io/ebpf-profiler/libpf"
-	"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
-	"go.opentelemetry.io/ebpf-profiler/reporter/samples"
 	"go.opentelemetry.io/ebpf-profiler/testutils"
-	"go.opentelemetry.io/ebpf-profiler/tracer"
 	tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types"
 	"go.opentelemetry.io/ebpf-profiler/util"
 )
 
 var soPath = flag.String("so-path", "/libparcagpucupti.so", "path to libparcagpucupti.so")
 
-type mockIntervals struct{}
-
-func (mockIntervals) MonitorInterval() time.Duration       { return 1 * time.Second }
-func (mockIntervals) TracePollInterval() time.Duration     { return 250 * time.Millisecond }
-func (mockIntervals) PIDCleanupInterval() time.Duration    { return 1 * time.Second }
-func (mockIntervals) ExecutableUnloadDelay() time.Duration { return 1 * time.Second }
-
-type mockReporter struct{}
-
-func (mockReporter) ExecutableKnown(_ libpf.FileID) bool { return true }
-
-// discardTraceReporter is a TraceReporter that silently discards all traces.
-type discardTraceReporter struct{}
-
-func (discardTraceReporter) ReportTraceEvent(_ *libpf.Trace, _ *samples.TraceEventMeta) error {
-	return nil
-}
-
-// parseProbes opens the .so and extracts the required parcagpu USDT probes.
-func parseProbes(t *testing.T) []pfelf.USDTProbe {
-	t.Helper()
-
-	ef, err := pfelf.Open(*soPath)
-	require.NoError(t, err, "failed to open %s", *soPath)
-	defer ef.Close()
-
-	require.NoError(t, ef.LoadSections(), "failed to load sections")
-
-	allProbes, err := ef.ParseUSDTProbes()
-	require.NoError(t, err, "failed to parse USDT probes")
-
-	var requiredProbes []pfelf.USDTProbe
-	for _, probe := range allProbes {
-		if probe.Provider == "parcagpu" &&
-			(probe.Name == "cuda_correlation" || probe.Name == "kernel_executed" || probe.Name == "activity_batch") {
-			requiredProbes = append(requiredProbes, probe)
-		}
-	}
-	// Need cuda_correlation + at least one of kernel_executed/activity_batch
-	hasCorrelation := false
-	hasKernel := false
-	for _, p := range requiredProbes {
-		switch p.Name {
-		case "cuda_correlation":
-			hasCorrelation = true
-		case "kernel_executed", "activity_batch":
-			hasKernel = true
-		}
-	}
-	require.True(t, hasCorrelation, "missing cuda_correlation probe")
-	require.True(t, hasKernel, "missing kernel_executed or activity_batch probe")
-
-	for _, p := range requiredProbes {
-		t.Logf("Found probe: provider=%s name=%s location=0x%x args=%s",
-			p.Provider, p.Name, p.Location, p.Arguments)
-	}
-	return requiredProbes
-}
-
-// createTracer creates a Tracer with InstrumentCudaLaunch enabled so the CUDA
-// eBPF programs (tail-call destinations) are loaded and the verifier runs.
-func createTracer(t *testing.T) (*tracer.Tracer, interpreter.EbpfHandler, context.CancelFunc) {
-	t.Helper()
-
-	ctx, cancel := context.WithCancel(context.Background())
-	enabledTracers, _ := tracertypes.Parse("")
-
-	tr, err := tracer.NewTracer(ctx, &tracer.Config{
-		Intervals:              &mockIntervals{},
-		IncludeTracers:         enabledTracers,
-		FilterErrorFrames:      false,
-		SamplesPerSecond:       20,
-		MapScaleFactor:         0,
-		KernelVersionCheck:     false,
-		BPFVerifierLogLevel:    0,
-		ProbabilisticInterval:  100,
-		ProbabilisticThreshold: 100,
-		OffCPUThreshold:        1 * math.MaxUint32,
-		InstrumentCudaLaunch:   true,
-	})
-	require.NoError(t, err, "failed to create tracer")
-
-	ebpfHandler := tr.GetEbpfHandler()
-	return tr, ebpfHandler, cancel
-}
-
-// buildCookiesAndProgNames builds the cookie and program-name slices that
-// mirror interpreter/gpu/cuda.go Attach().
-func buildCookiesAndProgNames(probes []pfelf.USDTProbe) ([]uint64, []string) {
-	cookies := make([]uint64, len(probes))
-	progNames := make([]string, len(probes))
-	for i, probe := range probes {
-		switch probe.Name {
-		case "cuda_correlation":
-			cookies[i] = 0 // CudaProgCorrelation
-			progNames[i] = "cuda_correlation"
-		case "kernel_executed":
-			cookies[i] = 1 // CudaProgKernelExec
-			progNames[i] = "cuda_kernel_exec"
-		case "activity_batch":
-			cookies[i] = 2 // CudaProgActivityBatch
-			progNames[i] = "cuda_activity_batch"
-		}
-	}
-	return cookies, progNames
-}
-
-// TestCUDAVerifierSingleShot verifies CUDA eBPF programs pass the BPF verifier
-// using individual per-probe program attachment (works on kernel 5.15+).
-// Forces single-shot mode so that AttachUSDTProbes uses per-probe attachment.
-func TestCUDAVerifierSingleShot(t *testing.T) {
-	if os.Getuid() != 0 {
-		t.Skip("requires root to load eBPF programs")
-	}
-	if !util.HasBpfGetAttachCookie() {
-		t.Skip("requires kernel support for bpf_get_attach_cookie (5.15+)")
-	}
-
-	// Force single-shot mode so loadUSDTProgram does not set
-	// AttachTraceUprobeMulti.
-	noMulti := false
-	util.SetTestOnlyMultiUprobeSupport(&noMulti)
-	defer util.SetTestOnlyMultiUprobeSupport(nil)
-
-	probes := parseProbes(t)
-
-	testutils.InitializeMetrics()
-	tr, ebpfHandler, cancel := createTracer(t)
-	defer tr.Close()
-	defer cancel()
-
-	cookies, progNames := buildCookiesAndProgNames(probes)
-
-	lc, err := ebpfHandler.AttachUSDTProbes(
-		libpf.PID(os.Getpid()),
-		*soPath,
-		"", // no multi-prog
-		probes,
-		cookies,
-		progNames,
-	)
-	require.NoError(t, err, "AttachUSDTProbes (single-shot) failed — BPF verifier rejected CUDA programs")
-	defer lc.Unload()
-
-	t.Log("SingleShot: all CUDA eBPF programs passed the BPF verifier")
-}
-
-// TestCUDAVerifierMultiProbe verifies CUDA eBPF programs pass the BPF verifier
-// using multi-uprobe attachment with cookies (requires kernel 6.6+).
-func TestCUDAVerifierMultiProbe(t *testing.T) {
-	if os.Getuid() != 0 {
-		t.Skip("requires root to load eBPF programs")
-	}
-	if !util.HasBpfGetAttachCookie() {
-		t.Skip("requires kernel support for bpf_get_attach_cookie (5.15+)")
-	}
-	if !util.HasMultiUprobeSupport() {
-		t.Skip("requires kernel support for uprobe multi-attach (6.6+)")
-	}
-
-	probes := parseProbes(t)
-
-	testutils.InitializeMetrics()
-
-	tr, ebpfHandler, cancel := createTracer(t)
-	defer tr.Close()
-	defer cancel()
-
-	cookies, progNames := buildCookiesAndProgNames(probes)
-
-	// Populate the tail-call prog array for activity_batch (the only tail-call
-	// target — correlation and kernel_exec are inlined in cuda_probe).
-	for _, probe := range probes {
-		if probe.Name == "activity_batch" {
-			err := ebpfHandler.UpdateProgArray("cuda_progs", 0, "cuda_activity_batch_tail")
-			require.NoError(t, err, "UpdateProgArray failed for cuda_activity_batch")
-			break
-		}
-	}
-
-	lc, err := ebpfHandler.AttachUSDTProbes(
-		libpf.PID(os.Getpid()),
-		*soPath,
-		"cuda_probe", // multi-probe program
-		probes,
-		cookies,
-		progNames,
-	)
-	require.NoError(t, err, "AttachUSDTProbes (multi-probe) failed — BPF verifier rejected CUDA programs")
-	defer lc.Unload()
-
-	t.Log("MultiProbe: all CUDA eBPF programs passed the BPF verifier")
-}
-
 // runEndToEnd exercises the full process-manager driven GPU probe attachment flow:
 //
 //  1. Start the full tracer pipeline (PID event processor, map monitors, profiling).
@@ -247,69 +48,31 @@ func runEndToEnd(t *testing.T, multiProbe bool) {
 	enabledTracers, _ := tracertypes.Parse("")
 	enabledTracers.Enable(tracertypes.CUDATracer)
 
-	tr, err := tracer.NewTracer(ctx, &tracer.Config{
-		TraceReporter:          discardTraceReporter{},
-		Intervals:              &mockIntervals{},
-		IncludeTracers:         enabledTracers,
-		FilterErrorFrames:      false,
-		SamplesPerSecond:       20,
-		MapScaleFactor:         0,
-		KernelVersionCheck:     false,
-		BPFVerifierLogLevel:    0,
-		ProbabilisticInterval:  100,
-		ProbabilisticThreshold: 100,
-		OffCPUThreshold:        1 * math.MaxUint32,
-		InstrumentCudaLaunch:   true,
-		VerboseMode:            true,
-	})
-	require.NoError(t, err, "failed to create tracer")
-	defer tr.Close()
-
-	// Start the full pipeline: PID event processor, profiling, map monitors.
-	tr.StartPIDEventProcessor(ctx)
-	require.NoError(t, tr.AttachTracer(), "AttachTracer failed")
-	require.NoError(t, tr.EnableProfiling(), "EnableProfiling failed")
-	require.NoError(t, tr.AttachSchedMonitor(), "AttachSchedMonitor failed")
-
-	ebpfTraceCh := make(chan *libpf.EbpfTrace)
-	require.NoError(t, tr.StartMapMonitors(ctx, ebpfTraceCh), "StartMapMonitors failed")
-
-	// Consume eBPF traces to prevent blocking the pipeline.
-	go func() {
-		for {
-			select {
-			case trace := <-ebpfTraceCh:
-				if trace != nil {
-					tr.HandleTrace(trace)
-				}
-			case <-ctx.Done():
-				return
-			}
-		}
-	}()
+	_, trc := testutils.StartTracer(ctx, t, enabledTracers, false)
+	defer trc.Close()
 
 	// Trigger initial process sync for our PID so the tracer discovers our
 	// mappings and attaches the dlopen uprobe to libc.
 	pid := libpf.PID(uint32(os.Getpid()))
-	tr.ForceProcessPID(pid)
+	trc.ForceProcessPID(pid)
 
 	// Wait until the process manager has processed our PID and attached
 	// interpreter instances (the rtld instance attaches the dlopen uprobe
 	// to libc as a side effect).
 	require.Eventually(t, func() bool {
-		instances := tr.GetInterpretersForPID(pid)
+		instances := trc.GetInterpretersForPID(pid)
 		if len(instances) > 0 {
 			t.Logf("process synced: %d interpreter(s) attached", len(instances))
 			return true
 		}
 		t.Log("waiting for initial process sync...")
-		tr.ForceProcessPID(pid)
+		trc.ForceProcessPID(pid)
 		return false
 	}, 30*time.Second, 200*time.Millisecond, "process manager never synced our PID")
 
 	// Set up perf reader on the cuda_timing_events map BEFORE the dlopen so we
 	// don't miss any events.
-	timingMap := tr.GetEbpfMaps()["cuda_timing_events"]
+	timingMap := trc.GetEbpfMaps()["cuda_timing_events"]
 	require.NotNil(t, timingMap, "cuda_timing_events map not found")
 
 	reader, err := perf.NewReader(timingMap, 1024*1024)
@@ -324,20 +87,20 @@ func runEndToEnd(t *testing.T, multiProbe bool) {
 	defer cCleanupParcaGPU()
 
 	// Speed up the re-sync after dlopen.
-	tr.ForceProcessPID(pid)
+	trc.ForceProcessPID(pid)
 
 	// Wait until the GPU interpreter instance appears, confirming the USDT
 	// probes were attached by the process manager.
 	require.Eventually(t, func() bool {
-		instances := tr.GetInterpretersForPID(pid)
+		instances := trc.GetInterpretersForPID(pid)
 		for _, inst := range instances {
 			if _, ok := inst.(*gpu.Instance); ok {
 				t.Log("GPU interpreter instance attached")
 				return true
 			}
 		}
 		t.Logf("waiting for GPU interpreter instance (%d interpreters so far)...", len(instances))
-		tr.ForceProcessPID(pid)
+		trc.ForceProcessPID(pid)
 		return false
 	}, 30*time.Second, 200*time.Millisecond, "GPU interpreter never attached after dlopen")
 
diff --git a/tracer/tracer.go b/tracer/tracer.go
@@ -170,8 +170,6 @@ type Config struct {
 	KernelVersionCheck bool
 	// VerboseMode indicates whether to enable verbose output of eBPF tracers.
 	VerboseMode bool
-	// InstrumentCudaLaunch determines whether to instrument calls to `cudaLaunchKernel`.
-	InstrumentCudaLaunch bool
 	// TraceBufferSizeMultiplier scales the trace_events perf buffer size.
 	// Useful for high-throughput scenarios like GPU profiling. Defaults to 1.
 	TraceBufferSizeMultiplier int
@@ -468,7 +466,7 @@ func initializeMapsAndPrograms(kmod *kallsyms.Module, cfg *Config) (
 	if cfg.OffCPUThreshold > 0 ||
 		len(cfg.ProbeLinks) > 0 ||
 		cfg.LoadProbe ||
-		cfg.InstrumentCudaLaunch {
+		cfg.IncludeTracers.Has(types.CUDATracer) {
 		// Load the tail call destinations if any kind of event profiling is enabled.
 		if err = loadProbeUnwinders(coll, ebpfProgs, ebpfMaps["kprobe_progs"], tailCallProgs,
 			cfg.BPFVerifierLogLevel, ebpfMaps["perf_progs"].FD()); err != nil {