Skip to content

Commit 6ec57b2

Browse files
committed
Use test helper infra in cuda e2e tests to simplify and remove
InstrumentCudaLaunch Also remove CUDAVerifier tests as that is subsumed by the e2e tests and the arm tests take 5m with all that extra bpf verification going through qemu
1 parent 3dc1249 commit 6ec57b2

File tree

2 files changed

+10
-249
lines changed

2 files changed

+10
-249
lines changed

test/cudaverify/cuda_verifier_test.go

Lines changed: 9 additions & 246 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"bytes"
77
"context"
88
"flag"
9-
"math"
109
"os"
1110
"testing"
1211
"time"
@@ -15,213 +14,15 @@ import (
1514
"github.com/cilium/ebpf/perf"
1615
"github.com/stretchr/testify/require"
1716

18-
"go.opentelemetry.io/ebpf-profiler/interpreter"
1917
"go.opentelemetry.io/ebpf-profiler/interpreter/gpu"
2018
"go.opentelemetry.io/ebpf-profiler/libpf"
21-
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
22-
"go.opentelemetry.io/ebpf-profiler/reporter/samples"
2319
"go.opentelemetry.io/ebpf-profiler/testutils"
24-
"go.opentelemetry.io/ebpf-profiler/tracer"
2520
tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types"
2621
"go.opentelemetry.io/ebpf-profiler/util"
2722
)
2823

2924
var soPath = flag.String("so-path", "/libparcagpucupti.so", "path to libparcagpucupti.so")
3025

31-
type mockIntervals struct{}
32-
33-
func (mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second }
34-
func (mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond }
35-
func (mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second }
36-
func (mockIntervals) ExecutableUnloadDelay() time.Duration { return 1 * time.Second }
37-
38-
type mockReporter struct{}
39-
40-
func (mockReporter) ExecutableKnown(_ libpf.FileID) bool { return true }
41-
42-
// discardTraceReporter is a TraceReporter that silently discards all traces.
43-
type discardTraceReporter struct{}
44-
45-
func (discardTraceReporter) ReportTraceEvent(_ *libpf.Trace, _ *samples.TraceEventMeta) error {
46-
return nil
47-
}
48-
49-
// parseProbes opens the .so and extracts the required parcagpu USDT probes.
50-
func parseProbes(t *testing.T) []pfelf.USDTProbe {
51-
t.Helper()
52-
53-
ef, err := pfelf.Open(*soPath)
54-
require.NoError(t, err, "failed to open %s", *soPath)
55-
defer ef.Close()
56-
57-
require.NoError(t, ef.LoadSections(), "failed to load sections")
58-
59-
allProbes, err := ef.ParseUSDTProbes()
60-
require.NoError(t, err, "failed to parse USDT probes")
61-
62-
var requiredProbes []pfelf.USDTProbe
63-
for _, probe := range allProbes {
64-
if probe.Provider == "parcagpu" &&
65-
(probe.Name == "cuda_correlation" || probe.Name == "kernel_executed" || probe.Name == "activity_batch") {
66-
requiredProbes = append(requiredProbes, probe)
67-
}
68-
}
69-
// Need cuda_correlation + at least one of kernel_executed/activity_batch
70-
hasCorrelation := false
71-
hasKernel := false
72-
for _, p := range requiredProbes {
73-
switch p.Name {
74-
case "cuda_correlation":
75-
hasCorrelation = true
76-
case "kernel_executed", "activity_batch":
77-
hasKernel = true
78-
}
79-
}
80-
require.True(t, hasCorrelation, "missing cuda_correlation probe")
81-
require.True(t, hasKernel, "missing kernel_executed or activity_batch probe")
82-
83-
for _, p := range requiredProbes {
84-
t.Logf("Found probe: provider=%s name=%s location=0x%x args=%s",
85-
p.Provider, p.Name, p.Location, p.Arguments)
86-
}
87-
return requiredProbes
88-
}
89-
90-
// createTracer creates a Tracer with InstrumentCudaLaunch enabled so the CUDA
91-
// eBPF programs (tail-call destinations) are loaded and the verifier runs.
92-
func createTracer(t *testing.T) (*tracer.Tracer, interpreter.EbpfHandler, context.CancelFunc) {
93-
t.Helper()
94-
95-
ctx, cancel := context.WithCancel(context.Background())
96-
enabledTracers, _ := tracertypes.Parse("")
97-
98-
tr, err := tracer.NewTracer(ctx, &tracer.Config{
99-
Intervals: &mockIntervals{},
100-
IncludeTracers: enabledTracers,
101-
FilterErrorFrames: false,
102-
SamplesPerSecond: 20,
103-
MapScaleFactor: 0,
104-
KernelVersionCheck: false,
105-
BPFVerifierLogLevel: 0,
106-
ProbabilisticInterval: 100,
107-
ProbabilisticThreshold: 100,
108-
OffCPUThreshold: 1 * math.MaxUint32,
109-
InstrumentCudaLaunch: true,
110-
})
111-
require.NoError(t, err, "failed to create tracer")
112-
113-
ebpfHandler := tr.GetEbpfHandler()
114-
return tr, ebpfHandler, cancel
115-
}
116-
117-
// buildCookiesAndProgNames builds the cookie and program-name slices that
118-
// mirror interpreter/gpu/cuda.go Attach().
119-
func buildCookiesAndProgNames(probes []pfelf.USDTProbe) ([]uint64, []string) {
120-
cookies := make([]uint64, len(probes))
121-
progNames := make([]string, len(probes))
122-
for i, probe := range probes {
123-
switch probe.Name {
124-
case "cuda_correlation":
125-
cookies[i] = 0 // CudaProgCorrelation
126-
progNames[i] = "cuda_correlation"
127-
case "kernel_executed":
128-
cookies[i] = 1 // CudaProgKernelExec
129-
progNames[i] = "cuda_kernel_exec"
130-
case "activity_batch":
131-
cookies[i] = 2 // CudaProgActivityBatch
132-
progNames[i] = "cuda_activity_batch"
133-
}
134-
}
135-
return cookies, progNames
136-
}
137-
138-
// TestCUDAVerifierSingleShot verifies CUDA eBPF programs pass the BPF verifier
139-
// using individual per-probe program attachment (works on kernel 5.15+).
140-
// Forces single-shot mode so that AttachUSDTProbes uses per-probe attachment.
141-
func TestCUDAVerifierSingleShot(t *testing.T) {
142-
if os.Getuid() != 0 {
143-
t.Skip("requires root to load eBPF programs")
144-
}
145-
if !util.HasBpfGetAttachCookie() {
146-
t.Skip("requires kernel support for bpf_get_attach_cookie (5.15+)")
147-
}
148-
149-
// Force single-shot mode so loadUSDTProgram does not set
150-
// AttachTraceUprobeMulti.
151-
noMulti := false
152-
util.SetTestOnlyMultiUprobeSupport(&noMulti)
153-
defer util.SetTestOnlyMultiUprobeSupport(nil)
154-
155-
probes := parseProbes(t)
156-
157-
testutils.InitializeMetrics()
158-
tr, ebpfHandler, cancel := createTracer(t)
159-
defer tr.Close()
160-
defer cancel()
161-
162-
cookies, progNames := buildCookiesAndProgNames(probes)
163-
164-
lc, err := ebpfHandler.AttachUSDTProbes(
165-
libpf.PID(os.Getpid()),
166-
*soPath,
167-
"", // no multi-prog
168-
probes,
169-
cookies,
170-
progNames,
171-
)
172-
require.NoError(t, err, "AttachUSDTProbes (single-shot) failed — BPF verifier rejected CUDA programs")
173-
defer lc.Unload()
174-
175-
t.Log("SingleShot: all CUDA eBPF programs passed the BPF verifier")
176-
}
177-
178-
// TestCUDAVerifierMultiProbe verifies CUDA eBPF programs pass the BPF verifier
179-
// using multi-uprobe attachment with cookies (requires kernel 6.6+).
180-
func TestCUDAVerifierMultiProbe(t *testing.T) {
181-
if os.Getuid() != 0 {
182-
t.Skip("requires root to load eBPF programs")
183-
}
184-
if !util.HasBpfGetAttachCookie() {
185-
t.Skip("requires kernel support for bpf_get_attach_cookie (5.15+)")
186-
}
187-
if !util.HasMultiUprobeSupport() {
188-
t.Skip("requires kernel support for uprobe multi-attach (6.6+)")
189-
}
190-
191-
probes := parseProbes(t)
192-
193-
testutils.InitializeMetrics()
194-
195-
tr, ebpfHandler, cancel := createTracer(t)
196-
defer tr.Close()
197-
defer cancel()
198-
199-
cookies, progNames := buildCookiesAndProgNames(probes)
200-
201-
// Populate the tail-call prog array for activity_batch (the only tail-call
202-
// target — correlation and kernel_exec are inlined in cuda_probe).
203-
for _, probe := range probes {
204-
if probe.Name == "activity_batch" {
205-
err := ebpfHandler.UpdateProgArray("cuda_progs", 0, "cuda_activity_batch_tail")
206-
require.NoError(t, err, "UpdateProgArray failed for cuda_activity_batch")
207-
break
208-
}
209-
}
210-
211-
lc, err := ebpfHandler.AttachUSDTProbes(
212-
libpf.PID(os.Getpid()),
213-
*soPath,
214-
"cuda_probe", // multi-probe program
215-
probes,
216-
cookies,
217-
progNames,
218-
)
219-
require.NoError(t, err, "AttachUSDTProbes (multi-probe) failed — BPF verifier rejected CUDA programs")
220-
defer lc.Unload()
221-
222-
t.Log("MultiProbe: all CUDA eBPF programs passed the BPF verifier")
223-
}
224-
22526
// runEndToEnd exercises the full process-manager driven GPU probe attachment flow:
22627
//
22728
// 1. Start the full tracer pipeline (PID event processor, map monitors, profiling).
@@ -247,69 +48,31 @@ func runEndToEnd(t *testing.T, multiProbe bool) {
24748
enabledTracers, _ := tracertypes.Parse("")
24849
enabledTracers.Enable(tracertypes.CUDATracer)
24950

250-
tr, err := tracer.NewTracer(ctx, &tracer.Config{
251-
TraceReporter: discardTraceReporter{},
252-
Intervals: &mockIntervals{},
253-
IncludeTracers: enabledTracers,
254-
FilterErrorFrames: false,
255-
SamplesPerSecond: 20,
256-
MapScaleFactor: 0,
257-
KernelVersionCheck: false,
258-
BPFVerifierLogLevel: 0,
259-
ProbabilisticInterval: 100,
260-
ProbabilisticThreshold: 100,
261-
OffCPUThreshold: 1 * math.MaxUint32,
262-
InstrumentCudaLaunch: true,
263-
VerboseMode: true,
264-
})
265-
require.NoError(t, err, "failed to create tracer")
266-
defer tr.Close()
267-
268-
// Start the full pipeline: PID event processor, profiling, map monitors.
269-
tr.StartPIDEventProcessor(ctx)
270-
require.NoError(t, tr.AttachTracer(), "AttachTracer failed")
271-
require.NoError(t, tr.EnableProfiling(), "EnableProfiling failed")
272-
require.NoError(t, tr.AttachSchedMonitor(), "AttachSchedMonitor failed")
273-
274-
ebpfTraceCh := make(chan *libpf.EbpfTrace)
275-
require.NoError(t, tr.StartMapMonitors(ctx, ebpfTraceCh), "StartMapMonitors failed")
276-
277-
// Consume eBPF traces to prevent blocking the pipeline.
278-
go func() {
279-
for {
280-
select {
281-
case trace := <-ebpfTraceCh:
282-
if trace != nil {
283-
tr.HandleTrace(trace)
284-
}
285-
case <-ctx.Done():
286-
return
287-
}
288-
}
289-
}()
51+
_, trc := testutils.StartTracer(ctx, t, enabledTracers, false)
52+
defer trc.Close()
29053

29154
// Trigger initial process sync for our PID so the tracer discovers our
29255
// mappings and attaches the dlopen uprobe to libc.
29356
pid := libpf.PID(uint32(os.Getpid()))
294-
tr.ForceProcessPID(pid)
57+
trc.ForceProcessPID(pid)
29558

29659
// Wait until the process manager has processed our PID and attached
29760
// interpreter instances (the rtld instance attaches the dlopen uprobe
29861
// to libc as a side effect).
29962
require.Eventually(t, func() bool {
300-
instances := tr.GetInterpretersForPID(pid)
63+
instances := trc.GetInterpretersForPID(pid)
30164
if len(instances) > 0 {
30265
t.Logf("process synced: %d interpreter(s) attached", len(instances))
30366
return true
30467
}
30568
t.Log("waiting for initial process sync...")
306-
tr.ForceProcessPID(pid)
69+
trc.ForceProcessPID(pid)
30770
return false
30871
}, 30*time.Second, 200*time.Millisecond, "process manager never synced our PID")
30972

31073
// Set up perf reader on the cuda_timing_events map BEFORE the dlopen so we
31174
// don't miss any events.
312-
timingMap := tr.GetEbpfMaps()["cuda_timing_events"]
75+
timingMap := trc.GetEbpfMaps()["cuda_timing_events"]
31376
require.NotNil(t, timingMap, "cuda_timing_events map not found")
31477

31578
reader, err := perf.NewReader(timingMap, 1024*1024)
@@ -324,20 +87,20 @@ func runEndToEnd(t *testing.T, multiProbe bool) {
32487
defer cCleanupParcaGPU()
32588

32689
// Speed up the re-sync after dlopen.
327-
tr.ForceProcessPID(pid)
90+
trc.ForceProcessPID(pid)
32891

32992
// Wait until the GPU interpreter instance appears, confirming the USDT
33093
// probes were attached by the process manager.
33194
require.Eventually(t, func() bool {
332-
instances := tr.GetInterpretersForPID(pid)
95+
instances := trc.GetInterpretersForPID(pid)
33396
for _, inst := range instances {
33497
if _, ok := inst.(*gpu.Instance); ok {
33598
t.Log("GPU interpreter instance attached")
33699
return true
337100
}
338101
}
339102
t.Logf("waiting for GPU interpreter instance (%d interpreters so far)...", len(instances))
340-
tr.ForceProcessPID(pid)
103+
trc.ForceProcessPID(pid)
341104
return false
342105
}, 30*time.Second, 200*time.Millisecond, "GPU interpreter never attached after dlopen")
343106

tracer/tracer.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,6 @@ type Config struct {
170170
KernelVersionCheck bool
171171
// VerboseMode indicates whether to enable verbose output of eBPF tracers.
172172
VerboseMode bool
173-
// InstrumentCudaLaunch determines whether to instrument calls to `cudaLaunchKernel`.
174-
InstrumentCudaLaunch bool
175173
// TraceBufferSizeMultiplier scales the trace_events perf buffer size.
176174
// Useful for high-throughput scenarios like GPU profiling. Defaults to 1.
177175
TraceBufferSizeMultiplier int
@@ -468,7 +466,7 @@ func initializeMapsAndPrograms(kmod *kallsyms.Module, cfg *Config) (
468466
if cfg.OffCPUThreshold > 0 ||
469467
len(cfg.ProbeLinks) > 0 ||
470468
cfg.LoadProbe ||
471-
cfg.InstrumentCudaLaunch {
469+
cfg.IncludeTracers.Has(types.CUDATracer) {
472470
// Load the tail call destinations if any kind of event profiling is enabled.
473471
if err = loadProbeUnwinders(coll, ebpfProgs, ebpfMaps["kprobe_progs"], tailCallProgs,
474472
cfg.BPFVerifierLogLevel, ebpfMaps["perf_progs"].FD()); err != nil {

0 commit comments

Comments
 (0)