Skip to content

Commit 7b31ee7

Browse files
committed
feat: honor provider Retry-After headers on 429 responses in embedders
- Parse and honor Retry-After header from providers on rate limit errors - Support multiple header formats: Retry-After, X-RateLimit-Reset-After, X-RateLimit-Reset - Add support for Gemini structured retry info in error response body - Update global rate limit state to prefer provider-specified delays over exponential backoff - Add comprehensive tests for Retry-After header handling - Improve rate limit handling to reduce unnecessary delays and quota exhaustion Fixes #8101
1 parent 87b45de commit 7b31ee7

File tree

3 files changed

+598
-37
lines changed

3 files changed

+598
-37
lines changed

src/services/code-index/embedders/__tests__/openai-compatible.spec.ts

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,265 @@ describe("OpenAICompatibleEmbedder", () => {
448448
})
449449
})
450450

451+
it("should honor Retry-After header when present", async () => {
452+
const testTexts = ["Hello world"]
453+
const testEmbedding = new Float32Array([0.25, 0.5, 0.75])
454+
const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
455+
456+
// Create error with Retry-After header info
457+
const rateLimitError: any = {
458+
status: 429,
459+
message: "Rate limit exceeded",
460+
headers: {
461+
"retry-after": "3", // 3 seconds
462+
},
463+
rateLimitInfo: { retryAfterMs: 3000 },
464+
}
465+
466+
mockEmbeddingsCreate.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
467+
data: [{ embedding: base64String }],
468+
usage: { prompt_tokens: 10, total_tokens: 15 },
469+
})
470+
471+
const resultPromise = embedder.createEmbeddings(testTexts)
472+
473+
// First attempt fails immediately
474+
await vitest.advanceTimersByTimeAsync(100)
475+
476+
// Should wait for provider-specified 3 seconds (plus 1s buffer = 4s)
477+
await vitest.advanceTimersByTimeAsync(4000)
478+
479+
const result = await resultPromise
480+
481+
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2)
482+
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("(using provider-specified delay)"))
483+
expect(result).toEqual({
484+
embeddings: [[0.25, 0.5, 0.75]],
485+
usage: { promptTokens: 10, totalTokens: 15 },
486+
})
487+
})
488+
489+
it("should parse Retry-After header as HTTP-date", async () => {
490+
const testTexts = ["Hello world"]
491+
const fullUrl = "https://api.example.com/v1/embeddings"
492+
const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, testModelId)
493+
494+
// Future date 5 seconds from now
495+
const futureDate = new Date(Date.now() + 5000)
496+
const httpDate = futureDate.toUTCString()
497+
498+
const mockFetch = global.fetch as MockedFunction<typeof fetch>
499+
mockFetch
500+
.mockResolvedValueOnce({
501+
ok: false,
502+
status: 429,
503+
headers: {
504+
get: (name: string) => (name === "retry-after" ? httpDate : null),
505+
},
506+
text: async () => "Rate limited",
507+
} as any)
508+
.mockResolvedValueOnce({
509+
ok: true,
510+
status: 200,
511+
json: async () => ({
512+
data: [{ embedding: [0.1, 0.2, 0.3] }],
513+
usage: { prompt_tokens: 10, total_tokens: 15 },
514+
}),
515+
} as any)
516+
517+
const resultPromise = embedder.createEmbeddings(testTexts)
518+
519+
// First attempt fails
520+
await vitest.advanceTimersByTimeAsync(100)
521+
522+
// Should wait approximately 5 seconds (plus buffer)
523+
await vitest.advanceTimersByTimeAsync(6000)
524+
525+
const result = await resultPromise
526+
527+
expect(mockFetch).toHaveBeenCalledTimes(2)
528+
expect(result.embeddings).toEqual([[0.1, 0.2, 0.3]])
529+
})
530+
531+
it("should handle X-RateLimit-Reset-After header", async () => {
532+
const testTexts = ["Hello world"]
533+
const fullUrl = "https://api.example.com/v1/embeddings"
534+
const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, testModelId)
535+
536+
const mockFetch = global.fetch as MockedFunction<typeof fetch>
537+
mockFetch
538+
.mockResolvedValueOnce({
539+
ok: false,
540+
status: 429,
541+
headers: {
542+
get: (name: string) => (name === "x-ratelimit-reset-after" ? "2" : null),
543+
},
544+
text: async () => "Rate limited",
545+
} as any)
546+
.mockResolvedValueOnce({
547+
ok: true,
548+
status: 200,
549+
json: async () => ({
550+
data: [{ embedding: [0.1, 0.2, 0.3] }],
551+
usage: { prompt_tokens: 10, total_tokens: 15 },
552+
}),
553+
} as any)
554+
555+
const resultPromise = embedder.createEmbeddings(testTexts)
556+
557+
// First attempt fails
558+
await vitest.advanceTimersByTimeAsync(100)
559+
560+
// Should wait 2 seconds (plus buffer)
561+
await vitest.advanceTimersByTimeAsync(3000)
562+
563+
const result = await resultPromise
564+
565+
expect(mockFetch).toHaveBeenCalledTimes(2)
566+
expect(result.embeddings).toEqual([[0.1, 0.2, 0.3]])
567+
})
568+
569+
it("should handle X-RateLimit-Reset header with Unix timestamp", async () => {
570+
const testTexts = ["Hello world"]
571+
const fullUrl = "https://api.example.com/v1/embeddings"
572+
const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, testModelId)
573+
574+
// Unix timestamp 4 seconds in the future
575+
const resetTimestamp = Math.floor((Date.now() + 4000) / 1000)
576+
577+
const mockFetch = global.fetch as MockedFunction<typeof fetch>
578+
mockFetch
579+
.mockResolvedValueOnce({
580+
ok: false,
581+
status: 429,
582+
headers: {
583+
get: (name: string) => (name === "x-ratelimit-reset" ? resetTimestamp.toString() : null),
584+
},
585+
text: async () => "Rate limited",
586+
} as any)
587+
.mockResolvedValueOnce({
588+
ok: true,
589+
status: 200,
590+
json: async () => ({
591+
data: [{ embedding: [0.1, 0.2, 0.3] }],
592+
usage: { prompt_tokens: 10, total_tokens: 15 },
593+
}),
594+
} as any)
595+
596+
const resultPromise = embedder.createEmbeddings(testTexts)
597+
598+
// First attempt fails
599+
await vitest.advanceTimersByTimeAsync(100)
600+
601+
// Should wait approximately 4 seconds (plus buffer)
602+
await vitest.advanceTimersByTimeAsync(5000)
603+
604+
const result = await resultPromise
605+
606+
expect(mockFetch).toHaveBeenCalledTimes(2)
607+
expect(result.embeddings).toEqual([[0.1, 0.2, 0.3]])
608+
})
609+
610+
it("should handle Gemini-style structured retry info in error body", async () => {
611+
const testTexts = ["Hello world"]
612+
const fullUrl = "https://generativelanguage.googleapis.com/v1beta/openai/embeddings"
613+
const embedder = new OpenAICompatibleEmbedder(fullUrl, testApiKey, testModelId)
614+
615+
const errorBody = {
616+
error: {
617+
code: 429,
618+
message: "Resource exhausted",
619+
details: [
620+
{
621+
metadata: {
622+
retry_delay: "10s",
623+
},
624+
},
625+
],
626+
},
627+
}
628+
629+
const mockFetch = global.fetch as MockedFunction<typeof fetch>
630+
mockFetch
631+
.mockResolvedValueOnce({
632+
ok: false,
633+
status: 429,
634+
headers: {
635+
get: () => null,
636+
},
637+
text: async () => JSON.stringify(errorBody),
638+
} as any)
639+
.mockResolvedValueOnce({
640+
ok: true,
641+
status: 200,
642+
json: async () => ({
643+
data: [{ embedding: [0.1, 0.2, 0.3] }],
644+
usage: { prompt_tokens: 10, total_tokens: 15 },
645+
}),
646+
} as any)
647+
648+
const resultPromise = embedder.createEmbeddings(testTexts)
649+
650+
// First attempt fails
651+
await vitest.advanceTimersByTimeAsync(100)
652+
653+
// Should wait 10 seconds (plus buffer)
654+
await vitest.advanceTimersByTimeAsync(11000)
655+
656+
const result = await resultPromise
657+
658+
expect(mockFetch).toHaveBeenCalledTimes(2)
659+
expect(result.embeddings).toEqual([[0.1, 0.2, 0.3]])
660+
})
661+
662+
it("should parse duration strings correctly", async () => {
663+
const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
664+
665+
// Access private method for testing
666+
const parseDurationString = (embedder as any).parseDurationString.bind(embedder)
667+
668+
expect(parseDurationString("10s")).toBe(10000)
669+
expect(parseDurationString("2m")).toBe(120000)
670+
expect(parseDurationString("1h")).toBe(3600000)
671+
expect(parseDurationString("invalid")).toBeUndefined()
672+
expect(parseDurationString(null)).toBeUndefined()
673+
expect(parseDurationString("")).toBeUndefined()
674+
})
675+
676+
it("should fall back to exponential backoff when no Retry-After is provided", async () => {
677+
const testTexts = ["Hello world"]
678+
const rateLimitError = {
679+
status: 429,
680+
message: "Rate limit exceeded",
681+
// No headers or rateLimitInfo
682+
}
683+
684+
const testEmbedding = new Float32Array([0.25, 0.5, 0.75])
685+
const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
686+
687+
mockEmbeddingsCreate.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
688+
data: [{ embedding: base64String }],
689+
usage: { prompt_tokens: 10, total_tokens: 15 },
690+
})
691+
692+
const resultPromise = embedder.createEmbeddings(testTexts)
693+
694+
// First attempt fails
695+
await vitest.advanceTimersByTimeAsync(100)
696+
697+
// Should use exponential backoff (5s for first retry)
698+
await vitest.advanceTimersByTimeAsync(5000)
699+
700+
const result = await resultPromise
701+
702+
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2)
703+
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("(using exponential backoff)"))
704+
expect(result).toEqual({
705+
embeddings: [[0.25, 0.5, 0.75]],
706+
usage: { promptTokens: 10, totalTokens: 15 },
707+
})
708+
})
709+
451710
it("should not retry on non-rate-limit errors", async () => {
452711
const testTexts = ["Hello world"]
453712
const authError = new Error("Unauthorized")

0 commit comments

Comments
 (0)