Skip to content

Commit f1178ab

Browse files
committed
feat: add global rate limiting for OpenAI-compatible embeddings
- Implement shared rate limit state across all embedder instances - Add exponential backoff (5s base, up to 5 minutes max) - Track consecutive rate limit errors with auto-reset after 60s - Add thread-safe mutex for concurrent access - Remove verbose logging to prevent log flooding during retries - Add comprehensive test coverage for rate limiting behavior This prevents multiple parallel batches from overwhelming APIs with restrictive rate limits by coordinating delays globally.
1 parent a7771a3 commit f1178ab

File tree

3 files changed

+372
-45
lines changed

3 files changed

+372
-45
lines changed
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
import { describe, it, expect, vi, beforeEach, afterEach, MockedClass, MockedFunction } from "vitest"
2+
import { OpenAI } from "openai"
3+
import { OpenAICompatibleEmbedder } from "../openai-compatible"
4+
5+
// Mock the OpenAI SDK
6+
vi.mock("openai")
7+
8+
// Mock TelemetryService
9+
vi.mock("@roo-code/telemetry", () => ({
10+
TelemetryService: {
11+
instance: {
12+
captureEvent: vi.fn(),
13+
},
14+
},
15+
}))
16+
17+
// Mock i18n
18+
vi.mock("../../../../i18n", () => ({
19+
t: (key: string, params?: Record<string, any>) => {
20+
const translations: Record<string, string> = {
21+
"embeddings:rateLimitRetry": `Rate limit hit, retrying in ${params?.delayMs}ms (attempt ${params?.attempt}/${params?.maxRetries})`,
22+
"embeddings:failedMaxAttempts": `Failed to create embeddings after ${params?.attempts} attempts`,
23+
"embeddings:failedWithStatus": `Failed to create embeddings after ${params?.attempts} attempts: HTTP ${params?.statusCode} - ${params?.errorMessage}`,
24+
"embeddings:failedWithError": `Failed to create embeddings after ${params?.attempts} attempts: ${params?.errorMessage}`,
25+
}
26+
return translations[key] || key
27+
},
28+
}))
29+
30+
const MockedOpenAI = OpenAI as MockedClass<typeof OpenAI>
31+
32+
describe("OpenAICompatibleEmbedder - Global Rate Limiting", () => {
33+
let mockOpenAIInstance: any
34+
let mockEmbeddingsCreate: MockedFunction<any>
35+
36+
const testBaseUrl = "https://api.openai.com/v1"
37+
const testApiKey = "test-api-key"
38+
const testModelId = "text-embedding-3-small"
39+
40+
beforeEach(() => {
41+
vi.clearAllMocks()
42+
vi.useFakeTimers()
43+
vi.spyOn(console, "warn").mockImplementation(() => {})
44+
vi.spyOn(console, "error").mockImplementation(() => {})
45+
46+
// Setup mock OpenAI instance
47+
mockEmbeddingsCreate = vi.fn()
48+
mockOpenAIInstance = {
49+
embeddings: {
50+
create: mockEmbeddingsCreate,
51+
},
52+
}
53+
54+
MockedOpenAI.mockImplementation(() => mockOpenAIInstance)
55+
56+
// Reset global rate limit state
57+
const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
58+
;(embedder as any).constructor.globalRateLimitState = {
59+
isRateLimited: false,
60+
rateLimitResetTime: 0,
61+
consecutiveRateLimitErrors: 0,
62+
lastRateLimitError: 0,
63+
mutex: (embedder as any).constructor.globalRateLimitState.mutex,
64+
}
65+
})
66+
67+
afterEach(() => {
68+
vi.useRealTimers()
69+
vi.restoreAllMocks()
70+
})
71+
72+
it("should apply global rate limiting across multiple batch requests", async () => {
73+
const embedder1 = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
74+
const embedder2 = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
75+
76+
// First batch hits rate limit
77+
const rateLimitError = new Error("Rate limit exceeded") as any
78+
rateLimitError.status = 429
79+
80+
mockEmbeddingsCreate
81+
.mockRejectedValueOnce(rateLimitError) // First attempt fails
82+
.mockResolvedValue({
83+
data: [{ embedding: "base64encodeddata" }],
84+
usage: { prompt_tokens: 10, total_tokens: 15 },
85+
})
86+
87+
// Start first batch request
88+
const batch1Promise = embedder1.createEmbeddings(["test1"])
89+
90+
// Advance time slightly to let the first request fail and set global rate limit
91+
await vi.advanceTimersByTimeAsync(100)
92+
93+
// Start second batch request while global rate limit is active
94+
const batch2Promise = embedder2.createEmbeddings(["test2"])
95+
96+
// Check that global rate limit was set
97+
const state = (embedder1 as any).constructor.globalRateLimitState
98+
expect(state.isRateLimited).toBe(true)
99+
expect(state.consecutiveRateLimitErrors).toBe(1)
100+
101+
// Advance time to complete rate limit delay (5 seconds base delay)
102+
await vi.advanceTimersByTimeAsync(5000)
103+
104+
// Both requests should complete
105+
const [result1, result2] = await Promise.all([batch1Promise, batch2Promise])
106+
107+
expect(result1.embeddings).toHaveLength(1)
108+
expect(result2.embeddings).toHaveLength(1)
109+
110+
// The second embedder should have waited for the global rate limit
111+
// No logging expected - we've removed it to prevent log flooding
112+
})
113+
114+
it("should track consecutive rate limit errors", async () => {
115+
const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
116+
const state = (embedder as any).constructor.globalRateLimitState
117+
118+
const rateLimitError = new Error("Rate limit exceeded") as any
119+
rateLimitError.status = 429
120+
121+
// Test that consecutive errors increment when they happen quickly
122+
// Mock multiple rate limit errors in a single request
123+
mockEmbeddingsCreate
124+
.mockRejectedValueOnce(rateLimitError) // First attempt
125+
.mockRejectedValueOnce(rateLimitError) // Retry 1
126+
.mockResolvedValueOnce({
127+
data: [{ embedding: "base64encodeddata" }],
128+
usage: { prompt_tokens: 10, total_tokens: 15 },
129+
})
130+
131+
const promise1 = embedder.createEmbeddings(["test1"])
132+
133+
// Wait for first attempt to fail
134+
await vi.advanceTimersByTimeAsync(100)
135+
expect(state.consecutiveRateLimitErrors).toBe(1)
136+
137+
// Wait for first retry (500ms) to also fail
138+
await vi.advanceTimersByTimeAsync(500)
139+
140+
// The state should show 2 consecutive errors now
141+
// Note: The count might be 1 if the global rate limit kicked in before the second attempt
142+
expect(state.consecutiveRateLimitErrors).toBeGreaterThanOrEqual(1)
143+
144+
// Wait for the global rate limit and successful retry
145+
await vi.advanceTimersByTimeAsync(20000)
146+
await promise1
147+
148+
// Verify the delay increases with consecutive errors
149+
// Make another request immediately that also hits rate limit
150+
mockEmbeddingsCreate.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
151+
data: [{ embedding: "base64encodeddata" }],
152+
usage: { prompt_tokens: 10, total_tokens: 15 },
153+
})
154+
155+
// Store the current consecutive count before the next request
156+
const previousCount = state.consecutiveRateLimitErrors
157+
158+
const promise2 = embedder.createEmbeddings(["test2"])
159+
await vi.advanceTimersByTimeAsync(100)
160+
161+
// Should have incremented from the previous count
162+
expect(state.consecutiveRateLimitErrors).toBeGreaterThan(previousCount)
163+
164+
// Complete the second request
165+
await vi.advanceTimersByTimeAsync(20000)
166+
await promise2
167+
})
168+
169+
it("should reset consecutive error count after time passes", async () => {
170+
const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
171+
const state = (embedder as any).constructor.globalRateLimitState
172+
173+
// Manually set state to simulate previous errors
174+
state.consecutiveRateLimitErrors = 3
175+
state.lastRateLimitError = Date.now() - 70000 // 70 seconds ago
176+
177+
const rateLimitError = new Error("Rate limit exceeded") as any
178+
rateLimitError.status = 429
179+
180+
mockEmbeddingsCreate.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
181+
data: [{ embedding: "base64encodeddata" }],
182+
usage: { prompt_tokens: 10, total_tokens: 15 },
183+
})
184+
185+
// Trigger the updateGlobalRateLimitState method
186+
await (embedder as any).updateGlobalRateLimitState(rateLimitError)
187+
188+
// Should reset to 1 since more than 60 seconds passed
189+
expect(state.consecutiveRateLimitErrors).toBe(1)
190+
})
191+
192+
it("should not exceed maximum delay of 5 minutes", async () => {
193+
const embedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
194+
const state = (embedder as any).constructor.globalRateLimitState
195+
196+
// Set state to simulate many consecutive errors
197+
state.consecutiveRateLimitErrors = 10 // This would normally result in a very long delay
198+
199+
const rateLimitError = new Error("Rate limit exceeded") as any
200+
rateLimitError.status = 429
201+
202+
// Trigger the updateGlobalRateLimitState method
203+
await (embedder as any).updateGlobalRateLimitState(rateLimitError)
204+
205+
// Calculate the expected delay
206+
const now = Date.now()
207+
const delay = state.rateLimitResetTime - now
208+
209+
// Should be capped at 5 minutes (300000ms)
210+
expect(delay).toBeLessThanOrEqual(300000)
211+
expect(delay).toBeGreaterThan(0)
212+
})
213+
})

src/services/code-index/embedders/__tests__/openai-compatible.spec.ts

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ describe("OpenAICompatibleEmbedder", () => {
6060
}
6161

6262
MockedOpenAI.mockImplementation(() => mockOpenAIInstance)
63+
64+
// Reset global rate limit state to prevent interference between tests
65+
const tempEmbedder = new OpenAICompatibleEmbedder(testBaseUrl, testApiKey, testModelId)
66+
;(tempEmbedder as any).constructor.globalRateLimitState = {
67+
isRateLimited: false,
68+
rateLimitResetTime: 0,
69+
consecutiveRateLimitErrors: 0,
70+
lastRateLimitError: 0,
71+
mutex: (tempEmbedder as any).constructor.globalRateLimitState.mutex,
72+
}
6373
})
6474

6575
afterEach(() => {
@@ -331,9 +341,7 @@ describe("OpenAICompatibleEmbedder", () => {
331341

332342
await embedder.createEmbeddings(testTexts)
333343

334-
// Should warn about oversized text
335-
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("exceeds maximum token limit"))
336-
344+
// Should silently skip oversized text - no logging to prevent flooding
337345
// Should only process normal texts (1 call for 2 normal texts batched together)
338346
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1)
339347
})
@@ -385,14 +393,22 @@ describe("OpenAICompatibleEmbedder", () => {
385393

386394
const resultPromise = embedder.createEmbeddings(testTexts)
387395

388-
// Fast-forward through the delays
389-
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS) // First retry delay
390-
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 2) // Second retry delay
396+
// First attempt fails immediately, triggering global rate limit (5s)
397+
await vitest.advanceTimersByTimeAsync(100)
398+
399+
// Wait for global rate limit delay
400+
await vitest.advanceTimersByTimeAsync(5000)
401+
402+
// Second attempt also fails, increasing delay
403+
await vitest.advanceTimersByTimeAsync(100)
404+
405+
// Wait for increased global rate limit delay (10s)
406+
await vitest.advanceTimersByTimeAsync(10000)
391407

392408
const result = await resultPromise
393409

394410
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(3)
395-
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("Rate limit hit, retrying in"))
411+
// No rate limit logging expected - removed to prevent log flooding
396412
expect(result).toEqual({
397413
embeddings: [[0.25, 0.5, 0.75]],
398414
usage: { promptTokens: 10, totalTokens: 15 },
@@ -443,10 +459,7 @@ describe("OpenAICompatibleEmbedder", () => {
443459
"Failed to create embeddings after 3 attempts: API connection failed",
444460
)
445461

446-
expect(console.error).toHaveBeenCalledWith(
447-
expect.stringContaining("OpenAI Compatible embedder error"),
448-
expect.any(Error),
449-
)
462+
// Error logging only on final attempt - removed intermediate logging
450463
})
451464

452465
it("should handle batch processing errors", async () => {
@@ -459,10 +472,7 @@ describe("OpenAICompatibleEmbedder", () => {
459472
"Failed to create embeddings after 3 attempts: Batch processing failed",
460473
)
461474

462-
expect(console.error).toHaveBeenCalledWith(
463-
expect.stringContaining("OpenAI Compatible embedder error"),
464-
batchError,
465-
)
475+
// Error logging only on final attempt - removed intermediate logging
466476
})
467477

468478
it("should handle empty text arrays", async () => {
@@ -791,11 +801,23 @@ describe("OpenAICompatibleEmbedder", () => {
791801
)
792802

793803
const resultPromise = embedder.createEmbeddings(["test"])
794-
await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 3)
804+
805+
// First attempt fails, triggering global rate limit
806+
await vitest.advanceTimersByTimeAsync(100)
807+
808+
// Wait for global rate limit (5s)
809+
await vitest.advanceTimersByTimeAsync(5000)
810+
811+
// Second attempt also fails
812+
await vitest.advanceTimersByTimeAsync(100)
813+
814+
// Wait for increased global rate limit (10s)
815+
await vitest.advanceTimersByTimeAsync(10000)
816+
795817
const result = await resultPromise
796818

797819
expect(global.fetch).toHaveBeenCalledTimes(3)
798-
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("Rate limit hit"))
820+
// No rate limit logging expected - removed to prevent log flooding
799821
expectEmbeddingValues(result.embeddings[0], [0.1, 0.2, 0.3])
800822
vitest.useRealTimers()
801823
})

0 commit comments

Comments
 (0)