Skip to content

Commit a86187f

Browse files
authored
Merge pull request #7 from parca-dev/rate-limit
rate limit
2 parents a4b6198 + 3170257 commit a86187f

File tree

1 file changed

+36
-4
lines changed

1 file changed

+36
-4
lines changed

cupti/cupti-prof.c

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,20 @@ static CUpti_SubscriberHandle subscriber = 0;
2929
static size_t outstandingEvents = 0;
3030

3131
// Thread-local tracking: store correlation ID from runtime ENTER
32-
// so we can skip driver EXIT probe when it matches (driver calls happen under runtime calls)
32+
// so we can skip driver EXIT probe when it matches (driver calls happen under
33+
// runtime calls)
3334
static __thread uint32_t runtimeEnterCorrelationId = 0;
3435

36+
// Rate limiting
37+
static __thread uint64_t lastProbeTimeNs = 0;
38+
static bool limiter_disabled = false;
39+
#define PROBE_MIN_INTERVAL_NS 500000 // 500μs
40+
3541
static void init_debug(void) {
3642
static bool initialized = false;
3743
if (!initialized) {
3844
debug_enabled = getenv("PARCAGPU_DEBUG") != NULL;
45+
limiter_disabled = getenv("PARCAGPU_LIMITER_DISABLE") != NULL;
3946
initialized = true;
4047
}
4148
}
@@ -217,9 +224,9 @@ static void parcagpuCuptiCallback(void *userdata, CUpti_CallbackDomain domain,
217224
if (domain == CUPTI_CB_DOMAIN_DRIVER_API) {
218225
// Skip if this driver call is under a runtime call (same correlation ID)
219226
if (correlationId == runtimeEnterCorrelationId) {
220-
DEBUG_PRINTF(
221-
"[CUPTI] Skipping driver EXIT correlationId=%u - runtime will handle\n",
222-
correlationId);
227+
DEBUG_PRINTF("[CUPTI] Skipping driver EXIT correlationId=%u - runtime "
228+
"will handle\n",
229+
correlationId);
223230
return;
224231
}
225232
// Pure driver call (no runtime wrapper) - use negative cbid
@@ -237,6 +244,31 @@ static void parcagpuCuptiCallback(void *userdata, CUpti_CallbackDomain domain,
237244
return;
238245
}
239246

247+
// Check if this is a graph launch (never rate limit these)
248+
bool isGraphLaunch = false;
249+
if (signedCbid < 0) {
250+
// Driver API: cuGraphLaunch = 514, cuGraphLaunch_ptsz = 515
251+
int driverCbid = -signedCbid;
252+
isGraphLaunch = (driverCbid == 514 || driverCbid == 515);
253+
} else {
254+
// Runtime API: cudaGraphLaunch = 311, cudaGraphLaunch_ptsz = 312
255+
isGraphLaunch = (signedCbid == 311 || signedCbid == 312);
256+
}
257+
258+
// Rate limit probes (skip for graph launches)
259+
if (!limiter_disabled && !isGraphLaunch) {
260+
struct timespec ts;
261+
clock_gettime(CLOCK_MONOTONIC, &ts);
262+
uint64_t nowNs = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
263+
if (nowNs - lastProbeTimeNs < PROBE_MIN_INTERVAL_NS) {
264+
DEBUG_PRINTF(
265+
"[CUPTI] Rate limited: skipping probe for correlationId=%u\n",
266+
correlationId);
267+
return;
268+
}
269+
lastProbeTimeNs = nowNs;
270+
}
271+
240272
outstandingEvents++;
241273
DTRACE_PROBE3(parcagpu, cuda_correlation, correlationId, signedCbid, name);
242274
// If we let too many events pile up it overwhelms the perf_event buffers,

0 commit comments

Comments
 (0)