Skip to content

Commit ad6e5f2

Browse files
committed
feat: improve rate limit handling with exponential backoff and jitter
- Add jitter (0-20%) to exponential backoff to prevent thundering herd - Extend retry support to transient errors (500, 502, 503, 504) - Add isRetryableError() helper method for centralized retry logic - Update tests to cover new retry behavior and jitter functionality - Improve reliability for Gemini and all OpenAI-compatible embedders
1 parent 8c8888a commit ad6e5f2

File tree

2 files changed

+123
-23
lines changed

2 files changed

+123
-23
lines changed

src/services/code-index/embedders/__tests__/openai-compatible.spec.ts

Lines changed: 89 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ describe("OpenAICompatibleEmbedder", () => {
367367
vitest.useRealTimers()
368368
})
369369

370-
it("should retry on rate limit errors with exponential backoff", async () => {
370+
it("should retry on rate limit errors with exponential backoff and jitter", async () => {
371371
const testTexts = ["Hello world"]
372372
const rateLimitError = { status: 429, message: "Rate limit exceeded" }
373373

@@ -385,9 +385,9 @@ describe("OpenAICompatibleEmbedder", () => {
385385

386386
const resultPromise = embedder.createEmbeddings(testTexts)
387387

388-
// Fast-forward through the delays
389-
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS) // First retry delay
390-
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 2) // Second retry delay
388+
// Fast-forward through the delays (with max jitter)
389+
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 1.2) // First retry delay with max jitter
390+
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 2 * 1.2) // Second retry delay with max jitter
391391

392392
const result = await resultPromise
393393

@@ -399,6 +399,45 @@ describe("OpenAICompatibleEmbedder", () => {
399399
})
400400
})
401401

402+
it("should retry on other transient errors (500, 502, 503, 504)", async () => {
403+
const testTexts = ["Hello world"]
404+
const transientErrors = [
405+
{ status: 500, message: "Internal Server Error" },
406+
{ status: 502, message: "Bad Gateway" },
407+
{ status: 503, message: "Service Unavailable" },
408+
{ status: 504, message: "Gateway Timeout" },
409+
]
410+
411+
for (const error of transientErrors) {
412+
vitest.clearAllMocks()
413+
414+
// Create base64 encoded embedding for successful response
415+
const testEmbedding = new Float32Array([0.25, 0.5, 0.75])
416+
const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
417+
418+
mockEmbeddingsCreate.mockRejectedValueOnce(error).mockResolvedValueOnce({
419+
data: [{ embedding: base64String }],
420+
usage: { prompt_tokens: 10, total_tokens: 15 },
421+
})
422+
423+
const resultPromise = embedder.createEmbeddings(testTexts)
424+
425+
// Fast-forward through the delay with max jitter
426+
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 1.2)
427+
428+
const result = await resultPromise
429+
430+
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2)
431+
expect(console.warn).toHaveBeenCalledWith(
432+
expect.stringContaining(`Error ${error.status} hit, retrying in`),
433+
)
434+
expect(result).toEqual({
435+
embeddings: [[0.25, 0.5, 0.75]],
436+
usage: { promptTokens: 10, totalTokens: 15 },
437+
})
438+
}
439+
})
440+
402441
it("should not retry on non-rate-limit errors", async () => {
403442
const testTexts = ["Hello world"]
404443
const authError = new Error("Unauthorized")
@@ -416,16 +455,26 @@ describe("OpenAICompatibleEmbedder", () => {
416455

417456
it("should throw error immediately on non-retryable errors", async () => {
418457
const testTexts = ["Hello world"]
419-
const serverError = new Error("Internal server error")
420-
;(serverError as any).status = 500
458+
const nonRetryableErrors = [
459+
{ status: 400, message: "Bad Request" },
460+
{ status: 403, message: "Forbidden" },
461+
{ status: 404, message: "Not Found" },
462+
]
421463

422-
mockEmbeddingsCreate.mockRejectedValue(serverError)
464+
for (const error of nonRetryableErrors) {
465+
vitest.clearAllMocks()
466+
const testError = new Error(error.message)
467+
;(testError as any).status = error.status
423468

424-
await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
425-
"Failed to create embeddings after 3 attempts: HTTP 500 - Internal server error",
426-
)
469+
mockEmbeddingsCreate.mockRejectedValue(testError)
427470

428-
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1)
471+
await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
472+
`Failed to create embeddings after 3 attempts: HTTP ${error.status} - ${error.message}`,
473+
)
474+
475+
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1)
476+
expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining("hit, retrying in"))
477+
}
429478
})
430479
})
431480

@@ -775,7 +824,7 @@ describe("OpenAICompatibleEmbedder", () => {
775824
await expect(embedder.createEmbeddings(["test"])).rejects.toThrow(expectedMessage)
776825
})
777826

778-
it("should handle rate limiting with retries", async () => {
827+
it("should handle rate limiting with retries and jitter", async () => {
779828
vitest.useFakeTimers()
780829
const embedder = new OpenAICompatibleEmbedder(azureUrl, testApiKey, testModelId)
781830
const base64String = createBase64Embedding([0.1, 0.2, 0.3])
@@ -791,7 +840,10 @@ describe("OpenAICompatibleEmbedder", () => {
791840
)
792841

793842
const resultPromise = embedder.createEmbeddings(["test"])
794-
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 3)
843+
// Account for max jitter (20%)
844+
await vitest.advanceTimersByTimeAsync(
845+
INITIAL_RETRY_DELAY_MS * 1.2 + INITIAL_RETRY_DELAY_MS * 2 * 1.2,
846+
)
795847
const result = await resultPromise
796848

797849
expect(global.fetch).toHaveBeenCalledTimes(3)
@@ -800,6 +852,30 @@ describe("OpenAICompatibleEmbedder", () => {
800852
vitest.useRealTimers()
801853
})
802854

855+
it("should handle other transient errors with retries", async () => {
856+
vitest.useFakeTimers()
857+
const embedder = new OpenAICompatibleEmbedder(azureUrl, testApiKey, testModelId)
858+
const base64String = createBase64Embedding([0.1, 0.2, 0.3])
859+
860+
;(global.fetch as MockedFunction<typeof fetch>)
861+
.mockResolvedValueOnce(createMockResponse({}, 503, false) as any)
862+
.mockResolvedValueOnce(
863+
createMockResponse({
864+
data: [{ embedding: base64String }],
865+
usage: { prompt_tokens: 10, total_tokens: 15 },
866+
}) as any,
867+
)
868+
869+
const resultPromise = embedder.createEmbeddings(["test"])
870+
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 1.2)
871+
const result = await resultPromise
872+
873+
expect(global.fetch).toHaveBeenCalledTimes(2)
874+
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("Error 503 hit"))
875+
expectEmbeddingValues(result.embeddings[0], [0.1, 0.2, 0.3])
876+
vitest.useRealTimers()
877+
})
878+
803879
it("should handle multiple embeddings and network errors", async () => {
804880
const embedder = new OpenAICompatibleEmbedder(azureUrl, testApiKey, testModelId)
805881

src/services/code-index/embedders/openai-compatible.ts

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -296,17 +296,20 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
296296

297297
const hasMoreAttempts = attempts < MAX_RETRIES - 1
298298

299-
// Check if it's a rate limit error
299+
// Check if it's a retryable error
300300
const httpError = error as HttpError
301-
if (httpError?.status === 429 && hasMoreAttempts) {
302-
const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
303-
console.warn(
304-
t("embeddings:rateLimitRetry", {
305-
delayMs,
306-
attempt: attempts + 1,
307-
maxRetries: MAX_RETRIES,
308-
}),
309-
)
301+
const isRetryableError = this.isRetryableError(httpError)
302+
303+
if (isRetryableError && hasMoreAttempts) {
304+
// Calculate exponential backoff with jitter
305+
const baseDelay = INITIAL_DELAY_MS * Math.pow(2, attempts)
306+
// Add jitter: random value between 0% and 20% of base delay
307+
const jitter = Math.random() * 0.2 * baseDelay
308+
const delayMs = Math.floor(baseDelay + jitter)
309+
310+
const errorType =
311+
httpError?.status === 429 ? "Rate limit" : `Error ${httpError?.status || "unknown"}`
312+
console.warn(`${errorType} hit, retrying in ${delayMs}ms (attempt ${attempts + 1}/${MAX_RETRIES})`)
310313
await new Promise((resolve) => setTimeout(resolve, delayMs))
311314
continue
312315
}
@@ -368,6 +371,27 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
368371
}, "openai-compatible")
369372
}
370373

374+
/**
375+
* Determines if an error is retryable based on HTTP status code
376+
* @param error The error to check
377+
* @returns true if the error is retryable, false otherwise
378+
*/
379+
private isRetryableError(error: HttpError | any): boolean {
380+
if (!error || typeof error.status !== "number") {
381+
return false
382+
}
383+
384+
const retryableStatuses = [
385+
429, // Too Many Requests (rate limit)
386+
500, // Internal Server Error
387+
502, // Bad Gateway
388+
503, // Service Unavailable
389+
504, // Gateway Timeout
390+
]
391+
392+
return retryableStatuses.includes(error.status)
393+
}
394+
371395
/**
372396
* Returns information about this embedder
373397
*/

0 commit comments

Comments
 (0)