diff --git a/flags/flags.go b/flags/flags.go index 9a926da612..e5c8ccb8ce 100644 --- a/flags/flags.go +++ b/flags/flags.go @@ -63,7 +63,7 @@ const ( func Parse() (Flags, error) { flags := Flags{} hostname, hostnameErr := os.Hostname() // hotnameErr handled below. - + // Build Kong options kongOptions := []kong.Option{ kong.Vars{ @@ -74,7 +74,7 @@ func Parse() (Flags, error) { "default_memlock_rlimit": "0", // No limit by default. (flag is deprecated) }, } - + kong.Parse(&flags, kongOptions...) // If a config path is provided, load the YAML configuration @@ -93,7 +93,7 @@ func Parse() (Flags, error) { if err != nil { return Flags{}, fmt.Errorf("failed to create parser with config: %w", err) } - + // Parse again with the configuration _, err = parser.Parse(os.Args[1:]) if err != nil { @@ -148,6 +148,8 @@ type Flags struct { CollectCustomLabels bool `default:"false" help:"Attempt to collect custom labels (e.g. trace ID) from the process."` + InstrumentCudaLaunch bool `default:"false" help:"instrument calls to cudaLaunchKernel."` + AnalyticsOptOut bool `default:"false" help:"Opt out of sending anonymous usage statistics."` Telemetry FlagsTelemetry `embed:"" prefix:"telemetry-"` diff --git a/go.mod b/go.mod index 518cc88624..d5aa9d0579 100644 --- a/go.mod +++ b/go.mod @@ -164,4 +164,4 @@ require ( sigs.k8s.io/yaml v1.4.0 // indirect ) -replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250929190428-bff0d782debe +replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20251008201720-d97b42173c24 diff --git a/go.sum b/go.sum index d625baeb84..b6fd84613a 100644 --- a/go.sum +++ b/go.sum @@ -264,8 +264,8 @@ github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplU github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U= github.com/parca-dev/oomprof v0.1.5-0.20250922151707-ec00408377fb h1:OErx5d2jVlHFFx5fnGKYNIf5KymfhwPVgdSct6KOlHQ= github.com/parca-dev/oomprof v0.1.5-0.20250922151707-ec00408377fb/go.mod h1:iqI6XrmiNWOa8m2vEIKo+GtQrqbWCMLFpBWuk8RuAPs= -github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250929190428-bff0d782debe h1:gWGETtC21u6CvmHqlP+8+AFZ1DhEZC1PVT1nW73zHb8= -github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250929190428-bff0d782debe/go.mod h1:XiydAikAQ7vc3UGjI0B5bWe+r9nJhE9sgHQqaP5Bvi0= +github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20251008201720-d97b42173c24 h1:dwMLiornqZq6Pj32MvBXs6kZM3oXj5+TCqbqpQbbyHw= +github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20251008201720-d97b42173c24/go.mod h1:XiydAikAQ7vc3UGjI0B5bWe+r9nJhE9sgHQqaP5Bvi0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= diff --git a/main.go b/main.go index 7d6328e715..1568931083 100644 --- a/main.go +++ b/main.go @@ -39,6 +39,7 @@ import ( "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/metrics" + "go.opentelemetry.io/ebpf-profiler/parcagpu" otelreporter "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" "go.opentelemetry.io/ebpf-profiler/tracehandler" @@ -349,6 +350,11 @@ func mainWithExitCode() flags.ExitCode { } } + // Remove CUDA tracer if it isn't enabled. + if !f.InstrumentCudaLaunch { + includeTracers.Disable(tracertypes.CUDATracer) + } + // Load relabel configs from the config file (if provided) var relabelConfigs []*relabel.Config if f.ConfigPath == "" { @@ -443,6 +449,7 @@ func mainWithExitCode() flags.ExitCode { CollectCustomLabels: f.CollectCustomLabels, OffCPUThreshold: uint32(f.OffCPUThreshold * math.MaxUint32), IncludeEnvVars: includeEnvVars, + InstrumentCudaLaunch: f.InstrumentCudaLaunch, }) metrics.SetReporter(parcaReporter) if err != nil { @@ -518,6 +525,12 @@ func mainWithExitCode() flags.ExitCode { return flags.Failure("Failed to start map monitors: %v", err) } + if f.InstrumentCudaLaunch { + // GPU processor will consume traces and filter out GPU samples awaiting + // timing information. + traceCh = parcagpu.Start(ctx, traceCh, trc.GetEbpfMaps()["cuda_timing_events"]) + } + if _, err := tracehandler.Start(ctx, rep, trc.TraceProcessor(), traceCh, intervals, traceHandlerCacheSize); err != nil { return flags.Failure("Failed to start trace handler: %v", err) diff --git a/reporter/parca_reporter.go b/reporter/parca_reporter.go index 18f985b9c1..b025e4a7e4 100644 --- a/reporter/parca_reporter.go +++ b/reporter/parca_reporter.go @@ -282,6 +282,9 @@ func (r *ParcaReporter) ReportTraceEvent(trace *libpf.Trace, r.sampleWriter.Temporality.AppendNull() writeSample(int64(meta.AllocBytes), 0, memPeriod, "memory", "alloc_space", "bytes", "space", "bytes") } + case support.TraceOriginCuda: + writeSample(meta.OffTime, time.Second.Nanoseconds(), 1e9/int64(r.samplesPerSecond), "parca_agent", "cuda", "nanoseconds", "cuda", "nanoseconds") + r.sampleWriter.Temporality.AppendString("delta") } return nil