Skip to content

Commit ac17708

Browse files
committed
fix: properly handle exponential backoff for rate limiting in embedders
- Fixed issue where multiple concurrent requests hitting rate limits would all log retry messages simultaneously - Improved global rate limit state management to coordinate retry delays across parallel requests - Added proper delay calculation that considers both global consecutive errors and per-request attempt numbers - Added success callback to reset consecutive error count when requests succeed - Ensures exponential backoff delays are applied sequentially rather than all at once Fixes #7029
1 parent 7b0f489 commit ac17708

File tree

2 files changed

+51
-20
lines changed

2 files changed

+51
-20
lines changed

src/services/code-index/embedders/__tests__/openai-compatible-rate-limit.spec.ts

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,8 @@ describe("OpenAICompatibleEmbedder - Global Rate Limiting", () => {
182182
usage: { prompt_tokens: 10, total_tokens: 15 },
183183
})
184184

185-
// Trigger the updateGlobalRateLimitState method
186-
await (embedder as any).updateGlobalRateLimitState(rateLimitError)
185+
// Trigger the updateGlobalRateLimitState method with attempt number
186+
await (embedder as any).updateGlobalRateLimitState(rateLimitError, 0)
187187

188188
// Should reset to 1 since more than 60 seconds passed
189189
expect(state.consecutiveRateLimitErrors).toBe(1)
@@ -199,12 +199,8 @@ describe("OpenAICompatibleEmbedder - Global Rate Limiting", () => {
199199
const rateLimitError = new Error("Rate limit exceeded") as any
200200
rateLimitError.status = 429
201201

202-
// Trigger the updateGlobalRateLimitState method
203-
await (embedder as any).updateGlobalRateLimitState(rateLimitError)
204-
205-
// Calculate the expected delay
206-
const now = Date.now()
207-
const delay = state.rateLimitResetTime - now
202+
// Trigger the updateGlobalRateLimitState method with attempt number
203+
const delay = await (embedder as any).updateGlobalRateLimitState(rateLimitError, 0)
208204

209205
// Should be capped at 5 minutes (300000ms)
210206
expect(delay).toBeLessThanOrEqual(300000)

src/services/code-index/embedders/openai-compatible.ts

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
294294

295295
const embeddings = response.data.map((item) => item.embedding as number[])
296296

297+
// Reset consecutive errors on success
298+
await this.resetGlobalRateLimitOnSuccess()
299+
297300
return {
298301
embeddings: embeddings,
299302
usage: {
@@ -315,14 +318,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
315318
// Check if it's a rate limit error
316319
const httpError = error as HttpError
317320
if (httpError?.status === 429) {
318-
// Update global rate limit state
319-
await this.updateGlobalRateLimitState(httpError)
320-
321321
if (hasMoreAttempts) {
322-
// Calculate delay based on global rate limit state
323-
const baseDelay = INITIAL_DELAY_MS * Math.pow(2, attempts)
324-
const globalDelay = await this.getGlobalRateLimitDelay()
325-
const delayMs = Math.max(baseDelay, globalDelay)
322+
// Update global rate limit state and get the delay
323+
const delayMs = await this.updateGlobalRateLimitState(httpError, attempts)
326324

327325
console.warn(
328326
t("embeddings:rateLimitRetry", {
@@ -434,14 +432,20 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
434432
}
435433

436434
/**
437-
* Updates global rate limit state when a 429 error occurs
435+
* Updates global rate limit state when a 429 error occurs and returns the delay to use
438436
*/
439-
private async updateGlobalRateLimitState(error: HttpError): Promise<void> {
437+
private async updateGlobalRateLimitState(error: HttpError, attemptNumber: number): Promise<number> {
440438
const release = await OpenAICompatibleEmbedder.globalRateLimitState.mutex.acquire()
441439
try {
442440
const state = OpenAICompatibleEmbedder.globalRateLimitState
443441
const now = Date.now()
444442

443+
// Check if we're already in a rate limit period
444+
if (state.isRateLimited && state.rateLimitResetTime > now) {
445+
// Return the remaining wait time
446+
return state.rateLimitResetTime - now
447+
}
448+
445449
// Increment consecutive rate limit errors
446450
if (now - state.lastRateLimitError < 60000) {
447451
// Within 1 minute
@@ -452,16 +456,47 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
452456

453457
state.lastRateLimitError = now
454458

455-
// Calculate exponential backoff based on consecutive errors
459+
// Calculate exponential backoff based on consecutive errors AND attempt number
460+
// Use the maximum of the two to ensure proper backoff
456461
const baseDelay = 5000 // 5 seconds base
457462
const maxDelay = 300000 // 5 minutes max
458-
const exponentialDelay = Math.min(baseDelay * Math.pow(2, state.consecutiveRateLimitErrors - 1), maxDelay)
463+
464+
// Calculate delay based on consecutive errors across all requests
465+
const globalExponentialDelay = Math.min(
466+
baseDelay * Math.pow(2, state.consecutiveRateLimitErrors - 1),
467+
maxDelay,
468+
)
469+
470+
// Calculate delay based on this specific request's attempt number
471+
const attemptExponentialDelay = Math.min(INITIAL_DELAY_MS * Math.pow(2, attemptNumber), maxDelay)
472+
473+
// Use the larger of the two delays
474+
const exponentialDelay = Math.max(globalExponentialDelay, attemptExponentialDelay)
459475

460476
// Set global rate limit
461477
state.isRateLimited = true
462478
state.rateLimitResetTime = now + exponentialDelay
463479

464-
// Silent rate limit activation - no logging to prevent flooding
480+
return exponentialDelay
481+
} finally {
482+
release()
483+
}
484+
}
485+
486+
/**
487+
* Resets the consecutive error count on successful request
488+
*/
489+
private async resetGlobalRateLimitOnSuccess(): Promise<void> {
490+
const release = await OpenAICompatibleEmbedder.globalRateLimitState.mutex.acquire()
491+
try {
492+
const state = OpenAICompatibleEmbedder.globalRateLimitState
493+
494+
// Reset rate limit state on success
495+
if (state.consecutiveRateLimitErrors > 0) {
496+
state.consecutiveRateLimitErrors = 0
497+
state.isRateLimited = false
498+
state.rateLimitResetTime = 0
499+
}
465500
} finally {
466501
release()
467502
}

0 commit comments

Comments
 (0)