diff --git a/src/services/code-index/embedders/__tests__/openai.spec.ts b/src/services/code-index/embedders/__tests__/openai.spec.ts index c8e4706f39d..e4c51eb7f7e 100644 --- a/src/services/code-index/embedders/__tests__/openai.spec.ts +++ b/src/services/code-index/embedders/__tests__/openai.spec.ts @@ -3,6 +3,7 @@ import type { MockedClass, MockedFunction } from "vitest" import { OpenAI } from "openai" import { OpenAiEmbedder } from "../openai" import { MAX_BATCH_TOKENS, MAX_ITEM_TOKENS, MAX_BATCH_RETRIES, INITIAL_RETRY_DELAY_MS } from "../../constants" +import { Mutex } from "async-mutex" // Mock the OpenAI SDK vitest.mock("openai") @@ -48,6 +49,14 @@ describe("OpenAiEmbedder", () => { consoleMocks.error.mockClear() consoleMocks.warn.mockClear() + // Reset global rate limit state + ;(OpenAiEmbedder as any).globalRateLimitState = { + isRateLimited: false, + rateLimitResetTime: 0, + rateLimitHeaders: {}, + mutex: new Mutex(), + } + MockedOpenAI = OpenAI as MockedClass mockEmbeddingsCreate = vitest.fn() @@ -94,10 +103,18 @@ describe("OpenAiEmbedder", () => { it("should create embeddings for a single text", async () => { const testTexts = ["Hello world"] const mockResponse = { - data: [{ embedding: [0.1, 0.2, 0.3] }], - usage: { prompt_tokens: 10, total_tokens: 15 }, + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 10, total_tokens: 15 }, + }, + response: { + headers: new Headers(), + }, } - mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.createEmbeddings(testTexts) @@ -114,10 +131,18 @@ describe("OpenAiEmbedder", () => { it("should create embeddings for multiple texts", async () => { const testTexts = ["Hello world", "Another text"] const mockResponse = { - data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }], - usage: { prompt_tokens: 20, total_tokens: 30 }, + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }], + usage: { prompt_tokens: 20, total_tokens: 30 }, + }, + response: { + headers: new Headers(), + }, } - mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.createEmbeddings(testTexts) @@ -138,10 +163,18 @@ describe("OpenAiEmbedder", () => { const testTexts = ["Hello world"] const customModel = "text-embedding-ada-002" const mockResponse = { - data: [{ embedding: [0.1, 0.2, 0.3] }], - usage: { prompt_tokens: 10, total_tokens: 15 }, + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 10, total_tokens: 15 }, + }, + response: { + headers: new Headers(), + }, } - mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await embedder.createEmbeddings(testTexts, customModel) @@ -154,10 +187,18 @@ describe("OpenAiEmbedder", () => { it("should handle missing usage data gracefully", async () => { const testTexts = ["Hello world"] const mockResponse = { - data: [{ embedding: [0.1, 0.2, 0.3] }], - usage: undefined, + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: undefined, + }, + response: { + headers: new Headers(), + }, } - mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.createEmbeddings(testTexts) @@ -175,10 +216,19 @@ describe("OpenAiEmbedder", () => { // Use normal sized texts that won't be skipped const testTexts = ["text1", "text2", "text3"] - mockEmbeddingsCreate.mockResolvedValue({ - data: testTexts.map((_, i) => ({ embedding: [i, i + 0.1, i + 0.2] })), - usage: { prompt_tokens: 30, total_tokens: 45 }, - }) + const mockResponse = { + data: { + data: testTexts.map((_, i) => ({ embedding: [i, i + 0.1, i + 0.2] })), + usage: { prompt_tokens: 30, total_tokens: 45 }, + }, + response: { + headers: new Headers(), + }, + } + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.createEmbeddings(testTexts) @@ -193,10 +243,19 @@ describe("OpenAiEmbedder", () => { const normalText = "normal text" const testTexts = [normalText, oversizedText, "another normal"] - mockEmbeddingsCreate.mockResolvedValue({ - data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }], - usage: { prompt_tokens: 20, total_tokens: 30 }, - }) + const mockResponse = { + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }], + usage: { prompt_tokens: 20, total_tokens: 30 }, + }, + response: { + headers: new Headers(), + }, + } + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.createEmbeddings(testTexts) @@ -222,21 +281,38 @@ describe("OpenAiEmbedder", () => { // Mock responses for each batch // First batch will have 12 texts (96000 tokens), second batch will have 3 texts (24000 tokens) - mockEmbeddingsCreate - .mockResolvedValueOnce({ + const mockResponse1 = { + data: { data: Array(12) .fill(null) .map((_, i) => ({ embedding: [i * 0.1, i * 0.1 + 0.1, i * 0.1 + 0.2] })), usage: { prompt_tokens: 96000, total_tokens: 96000 }, - }) - .mockResolvedValueOnce({ + }, + response: { + headers: new Headers(), + }, + } + + const mockResponse2 = { + data: { data: Array(3) .fill(null) .map((_, i) => ({ embedding: [(12 + i) * 0.1, (12 + i) * 0.1 + 0.1, (12 + i) * 0.1 + 0.2], })), usage: { prompt_tokens: 24000, total_tokens: 24000 }, - }) + }, + response: { + headers: new Headers(), + }, + } + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest + .fn() + .mockResolvedValueOnce(mockResponse1) + .mockResolvedValueOnce(mockResponse2) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.createEmbeddings(testTexts) @@ -273,61 +349,206 @@ describe("OpenAiEmbedder", () => { vitest.useRealTimers() }) - it("should retry on rate limit errors with exponential backoff", async () => { + it("should retry rate limit errors indefinitely", async () => { const testTexts = ["Hello world"] const rateLimitError = { status: 429, message: "Rate limit exceeded" } - mockEmbeddingsCreate + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn() + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) + + // Simulate multiple rate limit errors before success + mockWithResponse + .mockRejectedValueOnce(rateLimitError) + .mockRejectedValueOnce(rateLimitError) .mockRejectedValueOnce(rateLimitError) .mockRejectedValueOnce(rateLimitError) .mockResolvedValueOnce({ - data: [{ embedding: [0.1, 0.2, 0.3] }], - usage: { prompt_tokens: 10, total_tokens: 15 }, + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 10, total_tokens: 15 }, + }, + response: { + headers: new Headers(), + }, }) const resultPromise = embedder.createEmbeddings(testTexts) - // Fast-forward through the delays + // Fast-forward through the delays (4 retries) await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS) // First retry delay await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 2) // Second retry delay + await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 4) // Third retry delay + await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 8) // Fourth retry delay const result = await resultPromise - expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(3) + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(5) // 1 initial + 4 retries + // Should only log once (on first retry) to avoid flooding logs expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("Rate limit hit, retrying in")) + expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("attempt 1/∞")) expect(result).toEqual({ embeddings: [[0.1, 0.2, 0.3]], usage: { promptTokens: 10, totalTokens: 15 }, }) }) - it("should not retry on non-rate-limit errors", async () => { + it("should use smart backoff based on rate limit headers", async () => { + const testTexts = ["Hello world"] + const rateLimitError = { + status: 429, + message: "Rate limit exceeded", + response: { + headers: new Headers({ + "x-ratelimit-limit-requests": "60", + "x-ratelimit-limit-tokens": "150000", + "x-ratelimit-remaining-requests": "0", + "x-ratelimit-remaining-tokens": "0", + "x-ratelimit-reset-requests": "2s", + "x-ratelimit-reset-tokens": "30s", + }), + }, + } + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn() + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) + + mockWithResponse.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({ + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 10, total_tokens: 15 }, + }, + response: { + headers: new Headers(), + }, + }) + + const resultPromise = embedder.createEmbeddings(testTexts) + + // The smart backoff should use 30s (max of 2s and 30s) + 10% buffer = 33s + await vitest.advanceTimersByTimeAsync(33000) + + const result = await resultPromise + + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2) + // Should only log once (on first retry) to avoid flooding logs + expect(console.warn).toHaveBeenCalledTimes(2) // Once for rate limit message, once for rate limits details + expect(console.warn).toHaveBeenCalledWith( + expect.stringContaining("Rate limit hit, retrying in 33000ms"), + ) + expect(console.warn).toHaveBeenCalledWith( + expect.stringContaining("Rate limits - Requests: 0/60, Tokens: 0/150000"), + ) + expect(result).toEqual({ + embeddings: [[0.1, 0.2, 0.3]], + usage: { promptTokens: 10, totalTokens: 15 }, + }) + }) + + it("should parse various reset time formats correctly", () => { + // Test the parseResetTime method directly + const testCases = [ + { resetTime: "1s", expectedMs: 1000 }, + { resetTime: "30s", expectedMs: 30000 }, + { resetTime: "6m0s", expectedMs: 360000 }, + { resetTime: "1h30m", expectedMs: 5400000 }, + { resetTime: "2h", expectedMs: 7200000 }, + { resetTime: "5m", expectedMs: 300000 }, + ] + + // Access the private method for testing + const embedderAny = embedder as any + + for (const { resetTime, expectedMs } of testCases) { + const result = embedderAny.parseResetTime(resetTime) + expect(result).toBe(expectedMs) + } + }) + + it("should calculate smart backoff correctly", () => { + // Test the calculateSmartBackoff method directly + const embedderAny = embedder as any + + // Test with reset headers + const headers1 = { + resetRequests: "2s", + resetTokens: "30s", + } + // Should use max (30s) + 10% buffer = 33000ms + expect(embedderAny.calculateSmartBackoff(headers1, 0)).toBe(33000) + + // Test with only request reset + const headers2 = { + resetRequests: "5s", + } + // Should use 5s + 10% buffer = 5500ms + expect(embedderAny.calculateSmartBackoff(headers2, 0)).toBe(5500) + + // Test with no headers (fallback to exponential) + const headers3 = {} + // Should use exponential backoff + expect(embedderAny.calculateSmartBackoff(headers3, 0)).toBe(INITIAL_RETRY_DELAY_MS) + expect(embedderAny.calculateSmartBackoff(headers3, 1)).toBe(INITIAL_RETRY_DELAY_MS * 2) + expect(embedderAny.calculateSmartBackoff(headers3, 2)).toBe(INITIAL_RETRY_DELAY_MS * 4) + }) + + it("should not retry on non-rate-limit errors beyond MAX_RETRIES", async () => { const testTexts = ["Hello world"] const authError = new Error("Unauthorized") ;(authError as any).status = 401 - mockEmbeddingsCreate.mockRejectedValue(authError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn() + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) + + // Always reject with auth error + mockWithResponse.mockRejectedValue(authError) + + // Use real timers for this test to avoid unhandled promise rejection issues + vitest.useRealTimers() await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings: Authentication failed. Please check your OpenAI API key.", ) - expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1) + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(MAX_BATCH_RETRIES) expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining("Rate limit hit")) + + // Re-enable fake timers for other tests + vitest.useFakeTimers() }) - it("should throw error immediately on non-retryable errors", async () => { + it("should retry non-rate-limit errors up to MAX_RETRIES", async () => { const testTexts = ["Hello world"] const serverError = new Error("Internal server error") ;(serverError as any).status = 500 - mockEmbeddingsCreate.mockRejectedValue(serverError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn() + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) + + // Fail MAX_BATCH_RETRIES times + for (let i = 0; i < MAX_BATCH_RETRIES; i++) { + mockWithResponse.mockRejectedValueOnce(serverError) + } + + // Use real timers for this test to avoid unhandled promise rejection issues + vitest.useRealTimers() await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: HTTP 500 - Internal server error", ) - expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1) + expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(MAX_BATCH_RETRIES) + // Check for the specific error message format + expect(console.warn).toHaveBeenCalledWith( + expect.stringContaining("OpenAI embedder error"), + expect.stringContaining("Internal server error"), + ) + + // Re-enable fake timers for other tests + vitest.useFakeTimers() }) }) @@ -339,7 +560,9 @@ describe("OpenAiEmbedder", () => { const testTexts = ["Hello world"] const apiError = new Error("API connection failed") - mockEmbeddingsCreate.mockRejectedValue(apiError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(apiError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: API connection failed", @@ -366,11 +589,18 @@ describe("OpenAiEmbedder", () => { it("should handle malformed API responses", async () => { const testTexts = ["Hello world"] const malformedResponse = { - data: null, - usage: { prompt_tokens: 10, total_tokens: 15 }, + data: { + data: null, + usage: { prompt_tokens: 10, total_tokens: 15 }, + }, + response: { + headers: new Headers(), + }, } - mockEmbeddingsCreate.mockResolvedValue(malformedResponse) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(malformedResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow() }) @@ -380,7 +610,9 @@ describe("OpenAiEmbedder", () => { const authError = new Error("Invalid API key") ;(authError as any).status = 401 - mockEmbeddingsCreate.mockRejectedValue(authError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(authError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings: Authentication failed. Please check your OpenAI API key.", @@ -392,7 +624,9 @@ describe("OpenAiEmbedder", () => { const httpError = new Error("Bad request") ;(httpError as any).status = 400 - mockEmbeddingsCreate.mockRejectedValue(httpError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(httpError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: HTTP 400 - Bad request", @@ -403,7 +637,9 @@ describe("OpenAiEmbedder", () => { const testTexts = ["Hello world"] const networkError = new Error("Network timeout") - mockEmbeddingsCreate.mockRejectedValue(networkError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(networkError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: Network timeout", @@ -414,7 +650,9 @@ describe("OpenAiEmbedder", () => { const testTexts = ["Hello world"] const weirdError = { toString: () => "Custom error object" } - mockEmbeddingsCreate.mockRejectedValue(weirdError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(weirdError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: Custom error object", @@ -425,7 +663,9 @@ describe("OpenAiEmbedder", () => { const testTexts = ["Hello world"] const unknownError = null - mockEmbeddingsCreate.mockRejectedValue(unknownError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(unknownError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: Unknown error", @@ -436,7 +676,9 @@ describe("OpenAiEmbedder", () => { const testTexts = ["Hello world"] const stringError = "Something went wrong" - mockEmbeddingsCreate.mockRejectedValue(stringError) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(stringError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: Something went wrong", @@ -454,7 +696,9 @@ describe("OpenAiEmbedder", () => { }, } - mockEmbeddingsCreate.mockRejectedValue(errorWithFailingToString) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(errorWithFailingToString) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) // The test framework itself throws "toString failed" when trying to // display the error, so we need to expect that specific error @@ -468,7 +712,9 @@ describe("OpenAiEmbedder", () => { response: { status: 403 }, } - mockEmbeddingsCreate.mockRejectedValue(errorWithResponseStatus) + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(errorWithResponseStatus) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow( "Failed to create embeddings after 3 attempts: HTTP 403 - Request failed", @@ -480,10 +726,18 @@ describe("OpenAiEmbedder", () => { describe("validateConfiguration", () => { it("should validate successfully with valid configuration", async () => { const mockResponse = { - data: [{ embedding: [0.1, 0.2, 0.3] }], - usage: { prompt_tokens: 2, total_tokens: 2 }, + data: { + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { prompt_tokens: 2, total_tokens: 2 }, + }, + response: { + headers: new Headers(), + }, } - mockEmbeddingsCreate.mockResolvedValue(mockResponse) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.validateConfiguration() @@ -498,7 +752,10 @@ describe("OpenAiEmbedder", () => { it("should fail validation with authentication error", async () => { const authError = new Error("Invalid API key") ;(authError as any).status = 401 - mockEmbeddingsCreate.mockRejectedValue(authError) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(authError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.validateConfiguration() @@ -509,7 +766,10 @@ describe("OpenAiEmbedder", () => { it("should fail validation with rate limit error", async () => { const rateLimitError = new Error("Rate limit exceeded") ;(rateLimitError as any).status = 429 - mockEmbeddingsCreate.mockRejectedValue(rateLimitError) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(rateLimitError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.validateConfiguration() @@ -519,7 +779,10 @@ describe("OpenAiEmbedder", () => { it("should fail validation with connection error", async () => { const connectionError = new Error("ECONNREFUSED") - mockEmbeddingsCreate.mockRejectedValue(connectionError) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(connectionError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.validateConfiguration() @@ -530,7 +793,10 @@ describe("OpenAiEmbedder", () => { it("should fail validation with generic error", async () => { const genericError = new Error("Unknown error") ;(genericError as any).status = 500 - mockEmbeddingsCreate.mockRejectedValue(genericError) + + // Mock withResponse() to return the expected structure + const mockWithResponse = vitest.fn().mockRejectedValue(genericError) + mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse }) const result = await embedder.validateConfiguration() diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts index 471c3fd090d..0b2730b6d8d 100644 --- a/src/services/code-index/embedders/openai.ts +++ b/src/services/code-index/embedders/openai.ts @@ -13,6 +13,19 @@ import { t } from "../../../i18n" import { withValidationErrorHandling, formatEmbeddingError, HttpError } from "../shared/validation-helpers" import { TelemetryEventName } from "@roo-code/types" import { TelemetryService } from "@roo-code/telemetry" +import { Mutex } from "async-mutex" + +/** + * Rate limit headers returned by OpenAI API + */ +interface RateLimitHeaders { + limitRequests?: number + limitTokens?: number + remainingRequests?: number + remainingTokens?: number + resetRequests?: string + resetTokens?: string +} /** * OpenAI implementation of the embedder interface with batching and rate limiting @@ -21,6 +34,15 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { private embeddingsClient: OpenAI private readonly defaultModelId: string + // Global rate limiting state shared across all instances + private static globalRateLimitState = { + isRateLimited: false, + rateLimitResetTime: 0, + rateLimitHeaders: {} as RateLimitHeaders, + // Mutex to ensure thread-safe access to rate limit state + mutex: new Mutex(), + } + /** * Creates a new OpenAI embedder * @param options API handler options @@ -116,6 +138,95 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { return { embeddings: allEmbeddings, usage } } + /** + * Extracts rate limit headers from the response + * @param headers Response headers + * @returns Parsed rate limit headers + */ + private extractRateLimitHeaders(headers: Headers): RateLimitHeaders { + return { + limitRequests: headers.get("x-ratelimit-limit-requests") + ? parseInt(headers.get("x-ratelimit-limit-requests")!) + : undefined, + limitTokens: headers.get("x-ratelimit-limit-tokens") + ? parseInt(headers.get("x-ratelimit-limit-tokens")!) + : undefined, + remainingRequests: headers.get("x-ratelimit-remaining-requests") + ? parseInt(headers.get("x-ratelimit-remaining-requests")!) + : undefined, + remainingTokens: headers.get("x-ratelimit-remaining-tokens") + ? parseInt(headers.get("x-ratelimit-remaining-tokens")!) + : undefined, + resetRequests: headers.get("x-ratelimit-reset-requests") || undefined, + resetTokens: headers.get("x-ratelimit-reset-tokens") || undefined, + } + } + + /** + * Calculates the optimal delay based on rate limit headers + * @param headers Rate limit headers + * @param attempt Current attempt number + * @returns Delay in milliseconds + */ + private calculateSmartBackoff(headers: RateLimitHeaders, attempt: number): number { + // If we have reset times, use them to calculate optimal delay + if (headers.resetRequests || headers.resetTokens) { + const delays: number[] = [] + + // Parse reset times (format: "1s", "6m0s", etc.) + if (headers.resetRequests) { + const requestResetMs = this.parseResetTime(headers.resetRequests) + if (requestResetMs > 0) delays.push(requestResetMs) + } + + if (headers.resetTokens) { + const tokenResetMs = this.parseResetTime(headers.resetTokens) + if (tokenResetMs > 0) delays.push(tokenResetMs) + } + + // Use the maximum delay to ensure both limits are respected + if (delays.length > 0) { + const maxDelay = Math.max(...delays) + // Add a small buffer (10%) to account for clock differences + return Math.ceil(maxDelay * 1.1) + } + } + + // Fall back to exponential backoff if no headers available + return INITIAL_DELAY_MS * Math.pow(2, attempt) + } + + /** + * Parses reset time string to milliseconds + * @param resetTime Reset time string (e.g., "1s", "6m0s") + * @returns Time in milliseconds + */ + private parseResetTime(resetTime: string): number { + let totalMs = 0 + + // Match patterns like "6m", "30s", "6m0s" + const matches = resetTime.matchAll(/(\d+)([hms])/g) + + for (const match of matches) { + const value = parseInt(match[1]) + const unit = match[2] + + switch (unit) { + case "h": + totalMs += value * 60 * 60 * 1000 + break + case "m": + totalMs += value * 60 * 1000 + break + case "s": + totalMs += value * 1000 + break + } + } + + return totalMs + } + /** * Helper method to handle batch embedding with retries and exponential backoff * @param batchTexts Array of texts to embed in this batch @@ -126,12 +237,21 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { batchTexts: string[], model: string, ): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> { - for (let attempts = 0; attempts < MAX_RETRIES; attempts++) { + let lastRateLimitHeaders: RateLimitHeaders = {} + let attempts = 0 + + while (true) { + // Check global rate limit before attempting request + await this.waitForGlobalRateLimit() + try { - const response = await this.embeddingsClient.embeddings.create({ - input: batchTexts, - model: model, - }) + // Use withResponse() to get both data and response headers + const { data: response, response: httpResponse } = await this.embeddingsClient.embeddings + .create({ + input: batchTexts, + model: model, + }) + .withResponse() return { embeddings: response.data.map((item) => item.embedding), @@ -141,40 +261,75 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { }, } } catch (error: any) { - const hasMoreAttempts = attempts < MAX_RETRIES - 1 + attempts++ - // Check if it's a rate limit error + // Try to extract headers from the error response if available + if (error?.response?.headers) { + lastRateLimitHeaders = this.extractRateLimitHeaders(error.response.headers) + } + + // Check if it's a rate limit error - retry indefinitely for 429 const httpError = error as HttpError - if (httpError?.status === 429 && hasMoreAttempts) { - const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts) - console.warn( - t("embeddings:rateLimitRetry", { - delayMs, - attempt: attempts + 1, - maxRetries: MAX_RETRIES, - }), - ) + if (httpError?.status === 429) { + // Update global rate limit state + await this.updateGlobalRateLimitState(lastRateLimitHeaders) + + const delayMs = this.calculateSmartBackoff(lastRateLimitHeaders, attempts - 1) + + // Only log on first retry to avoid flooding logs + if (attempts === 1) { + console.warn( + t("embeddings:rateLimitRetry", { + delayMs, + attempt: attempts, + maxRetries: "∞", // Infinite retries for rate limits + }), + ) + + if ( + lastRateLimitHeaders.remainingRequests !== undefined || + lastRateLimitHeaders.remainingTokens !== undefined + ) { + console.warn( + `Rate limits - Requests: ${lastRateLimitHeaders.remainingRequests ?? "N/A"}/${lastRateLimitHeaders.limitRequests ?? "N/A"}, ` + + `Tokens: ${lastRateLimitHeaders.remainingTokens ?? "N/A"}/${lastRateLimitHeaders.limitTokens ?? "N/A"}`, + ) + } + } + await new Promise((resolve) => setTimeout(resolve, delayMs)) continue } - // Capture telemetry before reformatting the error - TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { - error: error instanceof Error ? error.message : String(error), - stack: error instanceof Error ? error.stack : undefined, - location: "OpenAiEmbedder:_embedBatchWithRetries", - attempt: attempts + 1, - }) + // For non-rate-limit errors, apply the retry limit + if (attempts >= MAX_RETRIES) { + // Capture telemetry before reformatting the error + TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { + error: error instanceof Error ? error.message : String(error), + stack: error instanceof Error ? error.stack : undefined, + location: "OpenAiEmbedder:_embedBatchWithRetries", + attempt: attempts, + rateLimitHeaders: lastRateLimitHeaders, + }) + + // Log the error for debugging + console.error(`OpenAI embedder error (attempt ${attempts}/${MAX_RETRIES}):`, error) + + // Format and throw the error + throw formatEmbeddingError(error, MAX_RETRIES) + } + + // For other errors, retry with exponential backoff up to MAX_RETRIES + const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts - 1) - // Log the error for debugging - console.error(`OpenAI embedder error (attempt ${attempts + 1}/${MAX_RETRIES}):`, error) + console.warn( + `OpenAI embedder error (attempt ${attempts}/${MAX_RETRIES}), retrying in ${delayMs}ms:`, + error instanceof Error ? error.message : String(error), + ) - // Format and throw the error - throw formatEmbeddingError(error, MAX_RETRIES) + await new Promise((resolve) => setTimeout(resolve, delayMs)) } } - - throw new Error(t("embeddings:failedMaxAttempts", { attempts: MAX_RETRIES })) } /** @@ -184,11 +339,13 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { async validateConfiguration(): Promise<{ valid: boolean; error?: string }> { return withValidationErrorHandling(async () => { try { - // Test with a minimal embedding request - const response = await this.embeddingsClient.embeddings.create({ - input: ["test"], - model: this.defaultModelId, - }) + // Test with a minimal embedding request using withResponse to check headers + const { data: response } = await this.embeddingsClient.embeddings + .create({ + input: ["test"], + model: this.defaultModelId, + }) + .withResponse() // Check if we got a valid response if (!response.data || response.data.length === 0) { @@ -216,4 +373,55 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { name: "openai", } } + + /** + * Waits if there's an active global rate limit + */ + private async waitForGlobalRateLimit(): Promise { + const release = await OpenAiEmbedder.globalRateLimitState.mutex.acquire() + try { + const state = OpenAiEmbedder.globalRateLimitState + + if (state.isRateLimited && state.rateLimitResetTime > Date.now()) { + const waitTime = state.rateLimitResetTime - Date.now() + // Silent wait - no logging to prevent flooding + release() // Release mutex before waiting + await new Promise((resolve) => setTimeout(resolve, waitTime)) + return + } + + // Reset rate limit if time has passed + if (state.isRateLimited && state.rateLimitResetTime <= Date.now()) { + state.isRateLimited = false + state.rateLimitHeaders = {} + } + } finally { + // Only release if we haven't already + try { + release() + } catch { + // Already released + } + } + } + + /** + * Updates global rate limit state when a 429 error occurs + */ + private async updateGlobalRateLimitState(headers: RateLimitHeaders): Promise { + const release = await OpenAiEmbedder.globalRateLimitState.mutex.acquire() + try { + const state = OpenAiEmbedder.globalRateLimitState + + // Calculate delay based on headers + const delayMs = this.calculateSmartBackoff(headers, 0) + + // Set global rate limit + state.isRateLimited = true + state.rateLimitResetTime = Date.now() + delayMs + state.rateLimitHeaders = headers + } finally { + release() + } + } }