Skip to content

Commit f2fd933

Browse files
committed
ParcaGPU support, most code is in otel library
1 parent 0bb0302 commit f2fd933

File tree

5 files changed

+25
-6
lines changed

5 files changed

+25
-6
lines changed

flags/flags.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ const (
6363
func Parse() (Flags, error) {
6464
flags := Flags{}
6565
hostname, hostnameErr := os.Hostname() // hotnameErr handled below.
66-
66+
6767
// Build Kong options
6868
kongOptions := []kong.Option{
6969
kong.Vars{
@@ -74,7 +74,7 @@ func Parse() (Flags, error) {
7474
"default_memlock_rlimit": "0", // No limit by default. (flag is deprecated)
7575
},
7676
}
77-
77+
7878
kong.Parse(&flags, kongOptions...)
7979

8080
// If a config path is provided, load the YAML configuration
@@ -93,7 +93,7 @@ func Parse() (Flags, error) {
9393
if err != nil {
9494
return Flags{}, fmt.Errorf("failed to create parser with config: %w", err)
9595
}
96-
96+
9797
// Parse again with the configuration
9898
_, err = parser.Parse(os.Args[1:])
9999
if err != nil {
@@ -148,6 +148,8 @@ type Flags struct {
148148

149149
CollectCustomLabels bool `default:"false" help:"Attempt to collect custom labels (e.g. trace ID) from the process."`
150150

151+
InstrumentCudaLaunch bool `default:"false" help:"instrument calls to cudaLaunchKernel."`
152+
151153
AnalyticsOptOut bool `default:"false" help:"Opt out of sending anonymous usage statistics."`
152154

153155
Telemetry FlagsTelemetry `embed:"" prefix:"telemetry-"`

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,4 @@ require (
164164
sigs.k8s.io/yaml v1.4.0 // indirect
165165
)
166166

167-
replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250929190428-bff0d782debe
167+
replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20251007203523-a1c6489c0364

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,8 @@ github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplU
264264
github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U=
265265
github.com/parca-dev/oomprof v0.1.5-0.20250922151707-ec00408377fb h1:OErx5d2jVlHFFx5fnGKYNIf5KymfhwPVgdSct6KOlHQ=
266266
github.com/parca-dev/oomprof v0.1.5-0.20250922151707-ec00408377fb/go.mod h1:iqI6XrmiNWOa8m2vEIKo+GtQrqbWCMLFpBWuk8RuAPs=
267-
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250929190428-bff0d782debe h1:gWGETtC21u6CvmHqlP+8+AFZ1DhEZC1PVT1nW73zHb8=
268-
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250929190428-bff0d782debe/go.mod h1:XiydAikAQ7vc3UGjI0B5bWe+r9nJhE9sgHQqaP5Bvi0=
267+
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20251007203523-a1c6489c0364 h1:gMEFTw14tK0C33FZXnTbgK4ZWBdrvqTX4OKUgW0IV78=
268+
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20251007203523-a1c6489c0364/go.mod h1:XiydAikAQ7vc3UGjI0B5bWe+r9nJhE9sgHQqaP5Bvi0=
269269
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
270270
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
271271
github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU=

main.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939
"go.opentelemetry.io/ebpf-profiler/host"
4040
"go.opentelemetry.io/ebpf-profiler/libpf"
4141
"go.opentelemetry.io/ebpf-profiler/metrics"
42+
"go.opentelemetry.io/ebpf-profiler/parcagpu"
4243
otelreporter "go.opentelemetry.io/ebpf-profiler/reporter"
4344
"go.opentelemetry.io/ebpf-profiler/times"
4445
"go.opentelemetry.io/ebpf-profiler/tracehandler"
@@ -349,6 +350,11 @@ func mainWithExitCode() flags.ExitCode {
349350
}
350351
}
351352

353+
// Remove CUDA tracer if it isn't enabled.
354+
if !f.InstrumentCudaLaunch {
355+
includeTracers.Disable(tracertypes.CUDATracer)
356+
}
357+
352358
// Load relabel configs from the config file (if provided)
353359
var relabelConfigs []*relabel.Config
354360
if f.ConfigPath == "" {
@@ -443,6 +449,7 @@ func mainWithExitCode() flags.ExitCode {
443449
CollectCustomLabels: f.CollectCustomLabels,
444450
OffCPUThreshold: uint32(f.OffCPUThreshold * math.MaxUint32),
445451
IncludeEnvVars: includeEnvVars,
452+
InstrumentCudaLaunch: f.InstrumentCudaLaunch,
446453
})
447454
metrics.SetReporter(parcaReporter)
448455
if err != nil {
@@ -518,6 +525,12 @@ func mainWithExitCode() flags.ExitCode {
518525
return flags.Failure("Failed to start map monitors: %v", err)
519526
}
520527

528+
if f.InstrumentCudaLaunch {
529+
// GPU processor will consume traces and filter out GPU samples awaiting
530+
// timing information.
531+
traceCh = parcagpu.Start(ctx, traceCh, trc.GetEbpfMaps()["cuda_timing_events"])
532+
}
533+
521534
if _, err := tracehandler.Start(ctx, rep, trc.TraceProcessor(),
522535
traceCh, intervals, traceHandlerCacheSize); err != nil {
523536
return flags.Failure("Failed to start trace handler: %v", err)

reporter/parca_reporter.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,10 @@ func (r *ParcaReporter) ReportTraceEvent(trace *libpf.Trace,
282282
r.sampleWriter.Temporality.AppendNull()
283283
writeSample(int64(meta.AllocBytes), 0, memPeriod, "memory", "alloc_space", "bytes", "space", "bytes")
284284
}
285+
case support.TraceOriginCuda:
286+
log.Info("reporting CUDA trace event")
287+
writeSample(meta.OffTime, time.Second.Nanoseconds(), 1e9/int64(r.samplesPerSecond), "parca_agent", "cuda", "nanoseconds", "cuda", "nanoseconds")
288+
r.sampleWriter.Temporality.AppendString("delta")
285289
}
286290

287291
return nil

0 commit comments

Comments
 (0)