Skip to content

Commit 1f208d4

Browse files
authored
Merge pull request #8 from parca-dev/rolling-limit
Replace interval rate limiter with token bucket algorithm
2 parents 4c29311 + ef761b2 commit 1f208d4

File tree

1 file changed

+43
-28
lines changed

1 file changed

+43
-28
lines changed

cupti/cupti-prof.c

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,45 @@ static size_t outstandingEvents = 0;
3333
// runtime calls)
3434
static __thread uint32_t runtimeEnterCorrelationId = 0;
3535

36-
// Rate limiting
37-
static __thread uint64_t lastProbeTimeNs = 0;
38-
static bool limiter_disabled = false;
39-
#define PROBE_MIN_INTERVAL_NS 500000 // 500μs
36+
// Rate limiting - token bucket algorithm (configurable via PARCAGPU_RATE_LIMIT)
37+
static double rateLimitPerSec = 100.0;
38+
39+
// Thread-local token bucket state
40+
static __thread uint64_t lastRefillNs = 0;
41+
static __thread double tokens = 0;
42+
43+
// Returns true if the sample should be emitted, false if rate limited
44+
static bool rateLimiterTryAcquire(uint64_t nowNs) {
45+
// Refill tokens based on elapsed time
46+
if (lastRefillNs > 0) {
47+
double elapsedSec = (nowNs - lastRefillNs) / 1e9;
48+
tokens = tokens + elapsedSec * rateLimitPerSec;
49+
if (tokens > rateLimitPerSec) {
50+
tokens = rateLimitPerSec;
51+
}
52+
} else {
53+
tokens = rateLimitPerSec; // Start with full bucket
54+
}
55+
lastRefillNs = nowNs;
56+
57+
if (tokens >= 1.0) {
58+
tokens -= 1.0;
59+
return true;
60+
}
61+
return false;
62+
}
4063

4164
static void init_debug(void) {
4265
static bool initialized = false;
4366
if (!initialized) {
4467
debug_enabled = getenv("PARCAGPU_DEBUG") != NULL;
45-
limiter_disabled = getenv("PARCAGPU_LIMITER_DISABLE") != NULL;
68+
const char *rateEnv = getenv("PARCAGPU_RATE_LIMIT");
69+
if (rateEnv != NULL) {
70+
double rate = atof(rateEnv);
71+
if (rate > 0) {
72+
rateLimitPerSec = rate;
73+
}
74+
}
4675
initialized = true;
4776
}
4877
}
@@ -244,29 +273,15 @@ static void parcagpuCuptiCallback(void *userdata, CUpti_CallbackDomain domain,
244273
return;
245274
}
246275

247-
// Check if this is a graph launch (never rate limit these)
248-
bool isGraphLaunch = false;
249-
if (signedCbid < 0) {
250-
// Driver API: cuGraphLaunch = 514, cuGraphLaunch_ptsz = 515
251-
int driverCbid = -signedCbid;
252-
isGraphLaunch = (driverCbid == 514 || driverCbid == 515);
253-
} else {
254-
// Runtime API: cudaGraphLaunch = 311, cudaGraphLaunch_ptsz = 312
255-
isGraphLaunch = (signedCbid == 311 || signedCbid == 312);
256-
}
257-
258-
// Rate limit probes (skip for graph launches)
259-
if (!limiter_disabled && !isGraphLaunch) {
260-
struct timespec ts;
261-
clock_gettime(CLOCK_MONOTONIC, &ts);
262-
uint64_t nowNs = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
263-
if (nowNs - lastProbeTimeNs < PROBE_MIN_INTERVAL_NS) {
264-
DEBUG_PRINTF(
265-
"[CUPTI] Rate limited: skipping probe for correlationId=%u\n",
266-
correlationId);
267-
return;
268-
}
269-
lastProbeTimeNs = nowNs;
276+
// Rate limit probes
277+
struct timespec ts;
278+
clock_gettime(CLOCK_MONOTONIC, &ts);
279+
uint64_t nowNs = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
280+
if (!rateLimiterTryAcquire(nowNs)) {
281+
DEBUG_PRINTF(
282+
"[CUPTI] Rate limited: skipping probe for correlationId=%u\n",
283+
correlationId);
284+
return;
270285
}
271286

272287
outstandingEvents++;

0 commit comments

Comments
 (0)