From ac177080848178b6fe3af82bb0bc56491ec633f8 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Wed, 13 Aug 2025 00:14:20 +0000 Subject: [PATCH] fix: properly handle exponential backoff for rate limiting in embedders - Fixed issue where multiple concurrent requests hitting rate limits would all log retry messages simultaneously - Improved global rate limit state management to coordinate retry delays across parallel requests - Added proper delay calculation that considers both global consecutive errors and per-request attempt numbers - Added success callback to reset consecutive error count when requests succeed - Ensures exponential backoff delays are applied sequentially rather than all at once Fixes #7029 --- .../openai-compatible-rate-limit.spec.ts | 12 ++-- .../code-index/embedders/openai-compatible.ts | 59 +++++++++++++++---- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/services/code-index/embedders/__tests__/openai-compatible-rate-limit.spec.ts b/src/services/code-index/embedders/__tests__/openai-compatible-rate-limit.spec.ts index 3e2acc398e2..a68db4ec8f4 100644 --- a/src/services/code-index/embedders/__tests__/openai-compatible-rate-limit.spec.ts +++ b/src/services/code-index/embedders/__tests__/openai-compatible-rate-limit.spec.ts @@ -182,8 +182,8 @@ describe("OpenAICompatibleEmbedder - Global Rate Limiting", () => { usage: { prompt_tokens: 10, total_tokens: 15 }, }) - // Trigger the updateGlobalRateLimitState method - await (embedder as any).updateGlobalRateLimitState(rateLimitError) + // Trigger the updateGlobalRateLimitState method with attempt number + await (embedder as any).updateGlobalRateLimitState(rateLimitError, 0) // Should reset to 1 since more than 60 seconds passed expect(state.consecutiveRateLimitErrors).toBe(1) @@ -199,12 +199,8 @@ describe("OpenAICompatibleEmbedder - Global Rate Limiting", () => { const rateLimitError = new Error("Rate limit exceeded") as any rateLimitError.status = 429 - // Trigger the updateGlobalRateLimitState method - await (embedder as any).updateGlobalRateLimitState(rateLimitError) - - // Calculate the expected delay - const now = Date.now() - const delay = state.rateLimitResetTime - now + // Trigger the updateGlobalRateLimitState method with attempt number + const delay = await (embedder as any).updateGlobalRateLimitState(rateLimitError, 0) // Should be capped at 5 minutes (300000ms) expect(delay).toBeLessThanOrEqual(300000) diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts index 06c4ba52823..644ebb68b2d 100644 --- a/src/services/code-index/embedders/openai-compatible.ts +++ b/src/services/code-index/embedders/openai-compatible.ts @@ -294,6 +294,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder { const embeddings = response.data.map((item) => item.embedding as number[]) + // Reset consecutive errors on success + await this.resetGlobalRateLimitOnSuccess() + return { embeddings: embeddings, usage: { @@ -315,14 +318,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder { // Check if it's a rate limit error const httpError = error as HttpError if (httpError?.status === 429) { - // Update global rate limit state - await this.updateGlobalRateLimitState(httpError) - if (hasMoreAttempts) { - // Calculate delay based on global rate limit state - const baseDelay = INITIAL_DELAY_MS * Math.pow(2, attempts) - const globalDelay = await this.getGlobalRateLimitDelay() - const delayMs = Math.max(baseDelay, globalDelay) + // Update global rate limit state and get the delay + const delayMs = await this.updateGlobalRateLimitState(httpError, attempts) console.warn( t("embeddings:rateLimitRetry", { @@ -434,14 +432,20 @@ export class OpenAICompatibleEmbedder implements IEmbedder { } /** - * Updates global rate limit state when a 429 error occurs + * Updates global rate limit state when a 429 error occurs and returns the delay to use */ - private async updateGlobalRateLimitState(error: HttpError): Promise { + private async updateGlobalRateLimitState(error: HttpError, attemptNumber: number): Promise { const release = await OpenAICompatibleEmbedder.globalRateLimitState.mutex.acquire() try { const state = OpenAICompatibleEmbedder.globalRateLimitState const now = Date.now() + // Check if we're already in a rate limit period + if (state.isRateLimited && state.rateLimitResetTime > now) { + // Return the remaining wait time + return state.rateLimitResetTime - now + } + // Increment consecutive rate limit errors if (now - state.lastRateLimitError < 60000) { // Within 1 minute @@ -452,16 +456,47 @@ export class OpenAICompatibleEmbedder implements IEmbedder { state.lastRateLimitError = now - // Calculate exponential backoff based on consecutive errors + // Calculate exponential backoff based on consecutive errors AND attempt number + // Use the maximum of the two to ensure proper backoff const baseDelay = 5000 // 5 seconds base const maxDelay = 300000 // 5 minutes max - const exponentialDelay = Math.min(baseDelay * Math.pow(2, state.consecutiveRateLimitErrors - 1), maxDelay) + + // Calculate delay based on consecutive errors across all requests + const globalExponentialDelay = Math.min( + baseDelay * Math.pow(2, state.consecutiveRateLimitErrors - 1), + maxDelay, + ) + + // Calculate delay based on this specific request's attempt number + const attemptExponentialDelay = Math.min(INITIAL_DELAY_MS * Math.pow(2, attemptNumber), maxDelay) + + // Use the larger of the two delays + const exponentialDelay = Math.max(globalExponentialDelay, attemptExponentialDelay) // Set global rate limit state.isRateLimited = true state.rateLimitResetTime = now + exponentialDelay - // Silent rate limit activation - no logging to prevent flooding + return exponentialDelay + } finally { + release() + } + } + + /** + * Resets the consecutive error count on successful request + */ + private async resetGlobalRateLimitOnSuccess(): Promise { + const release = await OpenAICompatibleEmbedder.globalRateLimitState.mutex.acquire() + try { + const state = OpenAICompatibleEmbedder.globalRateLimitState + + // Reset rate limit state on success + if (state.consecutiveRateLimitErrors > 0) { + state.consecutiveRateLimitErrors = 0 + state.isRateLimited = false + state.rateLimitResetTime = 0 + } } finally { release() }