@@ -294,6 +294,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
294294
295295 const embeddings = response . data . map ( ( item ) => item . embedding as number [ ] )
296296
297+ // Reset consecutive errors on success
298+ await this . resetGlobalRateLimitOnSuccess ( )
299+
297300 return {
298301 embeddings : embeddings ,
299302 usage : {
@@ -315,14 +318,9 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
315318 // Check if it's a rate limit error
316319 const httpError = error as HttpError
317320 if ( httpError ?. status === 429 ) {
318- // Update global rate limit state
319- await this . updateGlobalRateLimitState ( httpError )
320-
321321 if ( hasMoreAttempts ) {
322- // Calculate delay based on global rate limit state
323- const baseDelay = INITIAL_DELAY_MS * Math . pow ( 2 , attempts )
324- const globalDelay = await this . getGlobalRateLimitDelay ( )
325- const delayMs = Math . max ( baseDelay , globalDelay )
322+ // Update global rate limit state and get the delay
323+ const delayMs = await this . updateGlobalRateLimitState ( httpError , attempts )
326324
327325 console . warn (
328326 t ( "embeddings:rateLimitRetry" , {
@@ -434,14 +432,20 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
434432 }
435433
436434 /**
437- * Updates global rate limit state when a 429 error occurs
435+ * Updates global rate limit state when a 429 error occurs and returns the delay to use
438436 */
439- private async updateGlobalRateLimitState ( error : HttpError ) : Promise < void > {
437+ private async updateGlobalRateLimitState ( error : HttpError , attemptNumber : number ) : Promise < number > {
440438 const release = await OpenAICompatibleEmbedder . globalRateLimitState . mutex . acquire ( )
441439 try {
442440 const state = OpenAICompatibleEmbedder . globalRateLimitState
443441 const now = Date . now ( )
444442
443+ // Check if we're already in a rate limit period
444+ if ( state . isRateLimited && state . rateLimitResetTime > now ) {
445+ // Return the remaining wait time
446+ return state . rateLimitResetTime - now
447+ }
448+
445449 // Increment consecutive rate limit errors
446450 if ( now - state . lastRateLimitError < 60000 ) {
447451 // Within 1 minute
@@ -452,16 +456,47 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
452456
453457 state . lastRateLimitError = now
454458
455- // Calculate exponential backoff based on consecutive errors
459+ // Calculate exponential backoff based on consecutive errors AND attempt number
460+ // Use the maximum of the two to ensure proper backoff
456461 const baseDelay = 5000 // 5 seconds base
457462 const maxDelay = 300000 // 5 minutes max
458- const exponentialDelay = Math . min ( baseDelay * Math . pow ( 2 , state . consecutiveRateLimitErrors - 1 ) , maxDelay )
463+
464+ // Calculate delay based on consecutive errors across all requests
465+ const globalExponentialDelay = Math . min (
466+ baseDelay * Math . pow ( 2 , state . consecutiveRateLimitErrors - 1 ) ,
467+ maxDelay ,
468+ )
469+
470+ // Calculate delay based on this specific request's attempt number
471+ const attemptExponentialDelay = Math . min ( INITIAL_DELAY_MS * Math . pow ( 2 , attemptNumber ) , maxDelay )
472+
473+ // Use the larger of the two delays
474+ const exponentialDelay = Math . max ( globalExponentialDelay , attemptExponentialDelay )
459475
460476 // Set global rate limit
461477 state . isRateLimited = true
462478 state . rateLimitResetTime = now + exponentialDelay
463479
464- // Silent rate limit activation - no logging to prevent flooding
480+ return exponentialDelay
481+ } finally {
482+ release ( )
483+ }
484+ }
485+
486+ /**
487+ * Resets the consecutive error count on successful request
488+ */
489+ private async resetGlobalRateLimitOnSuccess ( ) : Promise < void > {
490+ const release = await OpenAICompatibleEmbedder . globalRateLimitState . mutex . acquire ( )
491+ try {
492+ const state = OpenAICompatibleEmbedder . globalRateLimitState
493+
494+ // Reset rate limit state on success
495+ if ( state . consecutiveRateLimitErrors > 0 ) {
496+ state . consecutiveRateLimitErrors = 0
497+ state . isRateLimited = false
498+ state . rateLimitResetTime = 0
499+ }
465500 } finally {
466501 release ( )
467502 }
0 commit comments