diff --git a/src/services/code-index/embedders/__tests__/openai.spec.ts b/src/services/code-index/embedders/__tests__/openai.spec.ts
index c8e4706f39d..e4c51eb7f7e 100644
--- a/src/services/code-index/embedders/__tests__/openai.spec.ts
+++ b/src/services/code-index/embedders/__tests__/openai.spec.ts
@@ -3,6 +3,7 @@ import type { MockedClass, MockedFunction } from "vitest"
 import { OpenAI } from "openai"
 import { OpenAiEmbedder } from "../openai"
 import { MAX_BATCH_TOKENS, MAX_ITEM_TOKENS, MAX_BATCH_RETRIES, INITIAL_RETRY_DELAY_MS } from "../../constants"
+import { Mutex } from "async-mutex"
 
 // Mock the OpenAI SDK
 vitest.mock("openai")
@@ -48,6 +49,14 @@ describe("OpenAiEmbedder", () => {
 		consoleMocks.error.mockClear()
 		consoleMocks.warn.mockClear()
 
+		// Reset global rate limit state
+		;(OpenAiEmbedder as any).globalRateLimitState = {
+			isRateLimited: false,
+			rateLimitResetTime: 0,
+			rateLimitHeaders: {},
+			mutex: new Mutex(),
+		}
+
 		MockedOpenAI = OpenAI as MockedClass<typeof OpenAI>
 		mockEmbeddingsCreate = vitest.fn()
 
@@ -94,10 +103,18 @@ describe("OpenAiEmbedder", () => {
 		it("should create embeddings for a single text", async () => {
 			const testTexts = ["Hello world"]
 			const mockResponse = {
-				data: [{ embedding: [0.1, 0.2, 0.3] }],
-				usage: { prompt_tokens: 10, total_tokens: 15 },
+				data: {
+					data: [{ embedding: [0.1, 0.2, 0.3] }],
+					usage: { prompt_tokens: 10, total_tokens: 15 },
+				},
+				response: {
+					headers: new Headers(),
+				},
 			}
-			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.createEmbeddings(testTexts)
 
@@ -114,10 +131,18 @@ describe("OpenAiEmbedder", () => {
 		it("should create embeddings for multiple texts", async () => {
 			const testTexts = ["Hello world", "Another text"]
 			const mockResponse = {
-				data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
-				usage: { prompt_tokens: 20, total_tokens: 30 },
+				data: {
+					data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
+					usage: { prompt_tokens: 20, total_tokens: 30 },
+				},
+				response: {
+					headers: new Headers(),
+				},
 			}
-			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.createEmbeddings(testTexts)
 
@@ -138,10 +163,18 @@ describe("OpenAiEmbedder", () => {
 			const testTexts = ["Hello world"]
 			const customModel = "text-embedding-ada-002"
 			const mockResponse = {
-				data: [{ embedding: [0.1, 0.2, 0.3] }],
-				usage: { prompt_tokens: 10, total_tokens: 15 },
+				data: {
+					data: [{ embedding: [0.1, 0.2, 0.3] }],
+					usage: { prompt_tokens: 10, total_tokens: 15 },
+				},
+				response: {
+					headers: new Headers(),
+				},
 			}
-			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			await embedder.createEmbeddings(testTexts, customModel)
 
@@ -154,10 +187,18 @@ describe("OpenAiEmbedder", () => {
 		it("should handle missing usage data gracefully", async () => {
 			const testTexts = ["Hello world"]
 			const mockResponse = {
-				data: [{ embedding: [0.1, 0.2, 0.3] }],
-				usage: undefined,
+				data: {
+					data: [{ embedding: [0.1, 0.2, 0.3] }],
+					usage: undefined,
+				},
+				response: {
+					headers: new Headers(),
+				},
 			}
-			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.createEmbeddings(testTexts)
 
@@ -175,10 +216,19 @@ describe("OpenAiEmbedder", () => {
 				// Use normal sized texts that won't be skipped
 				const testTexts = ["text1", "text2", "text3"]
 
-				mockEmbeddingsCreate.mockResolvedValue({
-					data: testTexts.map((_, i) => ({ embedding: [i, i + 0.1, i + 0.2] })),
-					usage: { prompt_tokens: 30, total_tokens: 45 },
-				})
+				const mockResponse = {
+					data: {
+						data: testTexts.map((_, i) => ({ embedding: [i, i + 0.1, i + 0.2] })),
+						usage: { prompt_tokens: 30, total_tokens: 45 },
+					},
+					response: {
+						headers: new Headers(),
+					},
+				}
+
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				const result = await embedder.createEmbeddings(testTexts)
 
@@ -193,10 +243,19 @@ describe("OpenAiEmbedder", () => {
 				const normalText = "normal text"
 				const testTexts = [normalText, oversizedText, "another normal"]
 
-				mockEmbeddingsCreate.mockResolvedValue({
-					data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
-					usage: { prompt_tokens: 20, total_tokens: 30 },
-				})
+				const mockResponse = {
+					data: {
+						data: [{ embedding: [0.1, 0.2, 0.3] }, { embedding: [0.4, 0.5, 0.6] }],
+						usage: { prompt_tokens: 20, total_tokens: 30 },
+					},
+					response: {
+						headers: new Headers(),
+					},
+				}
+
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				const result = await embedder.createEmbeddings(testTexts)
 
@@ -222,21 +281,38 @@ describe("OpenAiEmbedder", () => {
 
 				// Mock responses for each batch
 				// First batch will have 12 texts (96000 tokens), second batch will have 3 texts (24000 tokens)
-				mockEmbeddingsCreate
-					.mockResolvedValueOnce({
+				const mockResponse1 = {
+					data: {
 						data: Array(12)
 							.fill(null)
 							.map((_, i) => ({ embedding: [i * 0.1, i * 0.1 + 0.1, i * 0.1 + 0.2] })),
 						usage: { prompt_tokens: 96000, total_tokens: 96000 },
-					})
-					.mockResolvedValueOnce({
+					},
+					response: {
+						headers: new Headers(),
+					},
+				}
+
+				const mockResponse2 = {
+					data: {
 						data: Array(3)
 							.fill(null)
 							.map((_, i) => ({
 								embedding: [(12 + i) * 0.1, (12 + i) * 0.1 + 0.1, (12 + i) * 0.1 + 0.2],
 							})),
 						usage: { prompt_tokens: 24000, total_tokens: 24000 },
-					})
+					},
+					response: {
+						headers: new Headers(),
+					},
+				}
+
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest
+					.fn()
+					.mockResolvedValueOnce(mockResponse1)
+					.mockResolvedValueOnce(mockResponse2)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				const result = await embedder.createEmbeddings(testTexts)
 
@@ -273,61 +349,206 @@ describe("OpenAiEmbedder", () => {
 				vitest.useRealTimers()
 			})
 
-			it("should retry on rate limit errors with exponential backoff", async () => {
+			it("should retry rate limit errors indefinitely", async () => {
 				const testTexts = ["Hello world"]
 				const rateLimitError = { status: 429, message: "Rate limit exceeded" }
 
-				mockEmbeddingsCreate
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn()
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
+
+				// Simulate multiple rate limit errors before success
+				mockWithResponse
+					.mockRejectedValueOnce(rateLimitError)
+					.mockRejectedValueOnce(rateLimitError)
 					.mockRejectedValueOnce(rateLimitError)
 					.mockRejectedValueOnce(rateLimitError)
 					.mockResolvedValueOnce({
-						data: [{ embedding: [0.1, 0.2, 0.3] }],
-						usage: { prompt_tokens: 10, total_tokens: 15 },
+						data: {
+							data: [{ embedding: [0.1, 0.2, 0.3] }],
+							usage: { prompt_tokens: 10, total_tokens: 15 },
+						},
+						response: {
+							headers: new Headers(),
+						},
 					})
 
 				const resultPromise = embedder.createEmbeddings(testTexts)
 
-				// Fast-forward through the delays
+				// Fast-forward through the delays (4 retries)
 				await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS) // First retry delay
 				await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 2) // Second retry delay
+				await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 4) // Third retry delay
+				await vitest.advanceTimersByTimeAsync(INITIAL_RETRY_DELAY_MS * 8) // Fourth retry delay
 
 				const result = await resultPromise
 
-				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(3)
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(5) // 1 initial + 4 retries
+				// Should only log once (on first retry) to avoid flooding logs
 				expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("Rate limit hit, retrying in"))
+				expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("attempt 1/∞"))
 				expect(result).toEqual({
 					embeddings: [[0.1, 0.2, 0.3]],
 					usage: { promptTokens: 10, totalTokens: 15 },
 				})
 			})
 
-			it("should not retry on non-rate-limit errors", async () => {
+			it("should use smart backoff based on rate limit headers", async () => {
+				const testTexts = ["Hello world"]
+				const rateLimitError = {
+					status: 429,
+					message: "Rate limit exceeded",
+					response: {
+						headers: new Headers({
+							"x-ratelimit-limit-requests": "60",
+							"x-ratelimit-limit-tokens": "150000",
+							"x-ratelimit-remaining-requests": "0",
+							"x-ratelimit-remaining-tokens": "0",
+							"x-ratelimit-reset-requests": "2s",
+							"x-ratelimit-reset-tokens": "30s",
+						}),
+					},
+				}
+
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn()
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
+
+				mockWithResponse.mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({
+					data: {
+						data: [{ embedding: [0.1, 0.2, 0.3] }],
+						usage: { prompt_tokens: 10, total_tokens: 15 },
+					},
+					response: {
+						headers: new Headers(),
+					},
+				})
+
+				const resultPromise = embedder.createEmbeddings(testTexts)
+
+				// The smart backoff should use 30s (max of 2s and 30s) + 10% buffer = 33s
+				await vitest.advanceTimersByTimeAsync(33000)
+
+				const result = await resultPromise
+
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(2)
+				// Should only log once (on first retry) to avoid flooding logs
+				expect(console.warn).toHaveBeenCalledTimes(2) // Once for rate limit message, once for rate limits details
+				expect(console.warn).toHaveBeenCalledWith(
+					expect.stringContaining("Rate limit hit, retrying in 33000ms"),
+				)
+				expect(console.warn).toHaveBeenCalledWith(
+					expect.stringContaining("Rate limits - Requests: 0/60, Tokens: 0/150000"),
+				)
+				expect(result).toEqual({
+					embeddings: [[0.1, 0.2, 0.3]],
+					usage: { promptTokens: 10, totalTokens: 15 },
+				})
+			})
+
+			it("should parse various reset time formats correctly", () => {
+				// Test the parseResetTime method directly
+				const testCases = [
+					{ resetTime: "1s", expectedMs: 1000 },
+					{ resetTime: "30s", expectedMs: 30000 },
+					{ resetTime: "6m0s", expectedMs: 360000 },
+					{ resetTime: "1h30m", expectedMs: 5400000 },
+					{ resetTime: "2h", expectedMs: 7200000 },
+					{ resetTime: "5m", expectedMs: 300000 },
+				]
+
+				// Access the private method for testing
+				const embedderAny = embedder as any
+
+				for (const { resetTime, expectedMs } of testCases) {
+					const result = embedderAny.parseResetTime(resetTime)
+					expect(result).toBe(expectedMs)
+				}
+			})
+
+			it("should calculate smart backoff correctly", () => {
+				// Test the calculateSmartBackoff method directly
+				const embedderAny = embedder as any
+
+				// Test with reset headers
+				const headers1 = {
+					resetRequests: "2s",
+					resetTokens: "30s",
+				}
+				// Should use max (30s) + 10% buffer = 33000ms
+				expect(embedderAny.calculateSmartBackoff(headers1, 0)).toBe(33000)
+
+				// Test with only request reset
+				const headers2 = {
+					resetRequests: "5s",
+				}
+				// Should use 5s + 10% buffer = 5500ms
+				expect(embedderAny.calculateSmartBackoff(headers2, 0)).toBe(5500)
+
+				// Test with no headers (fallback to exponential)
+				const headers3 = {}
+				// Should use exponential backoff
+				expect(embedderAny.calculateSmartBackoff(headers3, 0)).toBe(INITIAL_RETRY_DELAY_MS)
+				expect(embedderAny.calculateSmartBackoff(headers3, 1)).toBe(INITIAL_RETRY_DELAY_MS * 2)
+				expect(embedderAny.calculateSmartBackoff(headers3, 2)).toBe(INITIAL_RETRY_DELAY_MS * 4)
+			})
+
+			it("should not retry on non-rate-limit errors beyond MAX_RETRIES", async () => {
 				const testTexts = ["Hello world"]
 				const authError = new Error("Unauthorized")
 				;(authError as any).status = 401
 
-				mockEmbeddingsCreate.mockRejectedValue(authError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn()
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
+
+				// Always reject with auth error
+				mockWithResponse.mockRejectedValue(authError)
+
+				// Use real timers for this test to avoid unhandled promise rejection issues
+				vitest.useRealTimers()
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings: Authentication failed. Please check your OpenAI API key.",
 				)
 
-				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1)
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(MAX_BATCH_RETRIES)
 				expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining("Rate limit hit"))
+
+				// Re-enable fake timers for other tests
+				vitest.useFakeTimers()
 			})
 
-			it("should throw error immediately on non-retryable errors", async () => {
+			it("should retry non-rate-limit errors up to MAX_RETRIES", async () => {
 				const testTexts = ["Hello world"]
 				const serverError = new Error("Internal server error")
 				;(serverError as any).status = 500
 
-				mockEmbeddingsCreate.mockRejectedValue(serverError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn()
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
+
+				// Fail MAX_BATCH_RETRIES times
+				for (let i = 0; i < MAX_BATCH_RETRIES; i++) {
+					mockWithResponse.mockRejectedValueOnce(serverError)
+				}
+
+				// Use real timers for this test to avoid unhandled promise rejection issues
+				vitest.useRealTimers()
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: HTTP 500 - Internal server error",
 				)
 
-				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(1)
+				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(MAX_BATCH_RETRIES)
+				// Check for the specific error message format
+				expect(console.warn).toHaveBeenCalledWith(
+					expect.stringContaining("OpenAI embedder error"),
+					expect.stringContaining("Internal server error"),
+				)
+
+				// Re-enable fake timers for other tests
+				vitest.useFakeTimers()
 			})
 		})
 
@@ -339,7 +560,9 @@ describe("OpenAiEmbedder", () => {
 				const testTexts = ["Hello world"]
 				const apiError = new Error("API connection failed")
 
-				mockEmbeddingsCreate.mockRejectedValue(apiError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(apiError)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: API connection failed",
@@ -366,11 +589,18 @@ describe("OpenAiEmbedder", () => {
 			it("should handle malformed API responses", async () => {
 				const testTexts = ["Hello world"]
 				const malformedResponse = {
-					data: null,
-					usage: { prompt_tokens: 10, total_tokens: 15 },
+					data: {
+						data: null,
+						usage: { prompt_tokens: 10, total_tokens: 15 },
+					},
+					response: {
+						headers: new Headers(),
+					},
 				}
 
-				mockEmbeddingsCreate.mockResolvedValue(malformedResponse)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockResolvedValue(malformedResponse)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow()
 			})
@@ -380,7 +610,9 @@ describe("OpenAiEmbedder", () => {
 				const authError = new Error("Invalid API key")
 				;(authError as any).status = 401
 
-				mockEmbeddingsCreate.mockRejectedValue(authError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(authError)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings: Authentication failed. Please check your OpenAI API key.",
@@ -392,7 +624,9 @@ describe("OpenAiEmbedder", () => {
 				const httpError = new Error("Bad request")
 				;(httpError as any).status = 400
 
-				mockEmbeddingsCreate.mockRejectedValue(httpError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(httpError)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: HTTP 400 - Bad request",
@@ -403,7 +637,9 @@ describe("OpenAiEmbedder", () => {
 				const testTexts = ["Hello world"]
 				const networkError = new Error("Network timeout")
 
-				mockEmbeddingsCreate.mockRejectedValue(networkError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(networkError)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: Network timeout",
@@ -414,7 +650,9 @@ describe("OpenAiEmbedder", () => {
 				const testTexts = ["Hello world"]
 				const weirdError = { toString: () => "Custom error object" }
 
-				mockEmbeddingsCreate.mockRejectedValue(weirdError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(weirdError)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: Custom error object",
@@ -425,7 +663,9 @@ describe("OpenAiEmbedder", () => {
 				const testTexts = ["Hello world"]
 				const unknownError = null
 
-				mockEmbeddingsCreate.mockRejectedValue(unknownError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(unknownError)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: Unknown error",
@@ -436,7 +676,9 @@ describe("OpenAiEmbedder", () => {
 				const testTexts = ["Hello world"]
 				const stringError = "Something went wrong"
 
-				mockEmbeddingsCreate.mockRejectedValue(stringError)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(stringError)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: Something went wrong",
@@ -454,7 +696,9 @@ describe("OpenAiEmbedder", () => {
 					},
 				}
 
-				mockEmbeddingsCreate.mockRejectedValue(errorWithFailingToString)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(errorWithFailingToString)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				// The test framework itself throws "toString failed" when trying to
 				// display the error, so we need to expect that specific error
@@ -468,7 +712,9 @@ describe("OpenAiEmbedder", () => {
 					response: { status: 403 },
 				}
 
-				mockEmbeddingsCreate.mockRejectedValue(errorWithResponseStatus)
+				// Mock withResponse() to return the expected structure
+				const mockWithResponse = vitest.fn().mockRejectedValue(errorWithResponseStatus)
+				mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow(
 					"Failed to create embeddings after 3 attempts: HTTP 403 - Request failed",
@@ -480,10 +726,18 @@ describe("OpenAiEmbedder", () => {
 	describe("validateConfiguration", () => {
 		it("should validate successfully with valid configuration", async () => {
 			const mockResponse = {
-				data: [{ embedding: [0.1, 0.2, 0.3] }],
-				usage: { prompt_tokens: 2, total_tokens: 2 },
+				data: {
+					data: [{ embedding: [0.1, 0.2, 0.3] }],
+					usage: { prompt_tokens: 2, total_tokens: 2 },
+				},
+				response: {
+					headers: new Headers(),
+				},
 			}
-			mockEmbeddingsCreate.mockResolvedValue(mockResponse)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockResolvedValue(mockResponse)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.validateConfiguration()
 
@@ -498,7 +752,10 @@ describe("OpenAiEmbedder", () => {
 		it("should fail validation with authentication error", async () => {
 			const authError = new Error("Invalid API key")
 			;(authError as any).status = 401
-			mockEmbeddingsCreate.mockRejectedValue(authError)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockRejectedValue(authError)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.validateConfiguration()
 
@@ -509,7 +766,10 @@ describe("OpenAiEmbedder", () => {
 		it("should fail validation with rate limit error", async () => {
 			const rateLimitError = new Error("Rate limit exceeded")
 			;(rateLimitError as any).status = 429
-			mockEmbeddingsCreate.mockRejectedValue(rateLimitError)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockRejectedValue(rateLimitError)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.validateConfiguration()
 
@@ -519,7 +779,10 @@ describe("OpenAiEmbedder", () => {
 
 		it("should fail validation with connection error", async () => {
 			const connectionError = new Error("ECONNREFUSED")
-			mockEmbeddingsCreate.mockRejectedValue(connectionError)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockRejectedValue(connectionError)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.validateConfiguration()
 
@@ -530,7 +793,10 @@ describe("OpenAiEmbedder", () => {
 		it("should fail validation with generic error", async () => {
 			const genericError = new Error("Unknown error")
 			;(genericError as any).status = 500
-			mockEmbeddingsCreate.mockRejectedValue(genericError)
+
+			// Mock withResponse() to return the expected structure
+			const mockWithResponse = vitest.fn().mockRejectedValue(genericError)
+			mockEmbeddingsCreate.mockReturnValue({ withResponse: mockWithResponse })
 
 			const result = await embedder.validateConfiguration()
 
diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts
index 471c3fd090d..0b2730b6d8d 100644
--- a/src/services/code-index/embedders/openai.ts
+++ b/src/services/code-index/embedders/openai.ts
@@ -13,6 +13,19 @@ import { t } from "../../../i18n"
 import { withValidationErrorHandling, formatEmbeddingError, HttpError } from "../shared/validation-helpers"
 import { TelemetryEventName } from "@roo-code/types"
 import { TelemetryService } from "@roo-code/telemetry"
+import { Mutex } from "async-mutex"
+
+/**
+ * Rate limit headers returned by OpenAI API
+ */
+interface RateLimitHeaders {
+	limitRequests?: number
+	limitTokens?: number
+	remainingRequests?: number
+	remainingTokens?: number
+	resetRequests?: string
+	resetTokens?: string
+}
 
 /**
  * OpenAI implementation of the embedder interface with batching and rate limiting
@@ -21,6 +34,15 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 	private embeddingsClient: OpenAI
 	private readonly defaultModelId: string
 
+	// Global rate limiting state shared across all instances
+	private static globalRateLimitState = {
+		isRateLimited: false,
+		rateLimitResetTime: 0,
+		rateLimitHeaders: {} as RateLimitHeaders,
+		// Mutex to ensure thread-safe access to rate limit state
+		mutex: new Mutex(),
+	}
+
 	/**
 	 * Creates a new OpenAI embedder
 	 * @param options API handler options
@@ -116,6 +138,95 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 		return { embeddings: allEmbeddings, usage }
 	}
 
+	/**
+	 * Extracts rate limit headers from the response
+	 * @param headers Response headers
+	 * @returns Parsed rate limit headers
+	 */
+	private extractRateLimitHeaders(headers: Headers): RateLimitHeaders {
+		return {
+			limitRequests: headers.get("x-ratelimit-limit-requests")
+				? parseInt(headers.get("x-ratelimit-limit-requests")!)
+				: undefined,
+			limitTokens: headers.get("x-ratelimit-limit-tokens")
+				? parseInt(headers.get("x-ratelimit-limit-tokens")!)
+				: undefined,
+			remainingRequests: headers.get("x-ratelimit-remaining-requests")
+				? parseInt(headers.get("x-ratelimit-remaining-requests")!)
+				: undefined,
+			remainingTokens: headers.get("x-ratelimit-remaining-tokens")
+				? parseInt(headers.get("x-ratelimit-remaining-tokens")!)
+				: undefined,
+			resetRequests: headers.get("x-ratelimit-reset-requests") || undefined,
+			resetTokens: headers.get("x-ratelimit-reset-tokens") || undefined,
+		}
+	}
+
+	/**
+	 * Calculates the optimal delay based on rate limit headers
+	 * @param headers Rate limit headers
+	 * @param attempt Current attempt number
+	 * @returns Delay in milliseconds
+	 */
+	private calculateSmartBackoff(headers: RateLimitHeaders, attempt: number): number {
+		// If we have reset times, use them to calculate optimal delay
+		if (headers.resetRequests || headers.resetTokens) {
+			const delays: number[] = []
+
+			// Parse reset times (format: "1s", "6m0s", etc.)
+			if (headers.resetRequests) {
+				const requestResetMs = this.parseResetTime(headers.resetRequests)
+				if (requestResetMs > 0) delays.push(requestResetMs)
+			}
+
+			if (headers.resetTokens) {
+				const tokenResetMs = this.parseResetTime(headers.resetTokens)
+				if (tokenResetMs > 0) delays.push(tokenResetMs)
+			}
+
+			// Use the maximum delay to ensure both limits are respected
+			if (delays.length > 0) {
+				const maxDelay = Math.max(...delays)
+				// Add a small buffer (10%) to account for clock differences
+				return Math.ceil(maxDelay * 1.1)
+			}
+		}
+
+		// Fall back to exponential backoff if no headers available
+		return INITIAL_DELAY_MS * Math.pow(2, attempt)
+	}
+
+	/**
+	 * Parses reset time string to milliseconds
+	 * @param resetTime Reset time string (e.g., "1s", "6m0s")
+	 * @returns Time in milliseconds
+	 */
+	private parseResetTime(resetTime: string): number {
+		let totalMs = 0
+
+		// Match patterns like "6m", "30s", "6m0s"
+		const matches = resetTime.matchAll(/(\d+)([hms])/g)
+
+		for (const match of matches) {
+			const value = parseInt(match[1])
+			const unit = match[2]
+
+			switch (unit) {
+				case "h":
+					totalMs += value * 60 * 60 * 1000
+					break
+				case "m":
+					totalMs += value * 60 * 1000
+					break
+				case "s":
+					totalMs += value * 1000
+					break
+			}
+		}
+
+		return totalMs
+	}
+
 	/**
 	 * Helper method to handle batch embedding with retries and exponential backoff
 	 * @param batchTexts Array of texts to embed in this batch
@@ -126,12 +237,21 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 		batchTexts: string[],
 		model: string,
 	): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
-		for (let attempts = 0; attempts < MAX_RETRIES; attempts++) {
+		let lastRateLimitHeaders: RateLimitHeaders = {}
+		let attempts = 0
+
+		while (true) {
+			// Check global rate limit before attempting request
+			await this.waitForGlobalRateLimit()
+
 			try {
-				const response = await this.embeddingsClient.embeddings.create({
-					input: batchTexts,
-					model: model,
-				})
+				// Use withResponse() to get both data and response headers
+				const { data: response, response: httpResponse } = await this.embeddingsClient.embeddings
+					.create({
+						input: batchTexts,
+						model: model,
+					})
+					.withResponse()
 
 				return {
 					embeddings: response.data.map((item) => item.embedding),
@@ -141,40 +261,75 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 					},
 				}
 			} catch (error: any) {
-				const hasMoreAttempts = attempts < MAX_RETRIES - 1
+				attempts++
 
-				// Check if it's a rate limit error
+				// Try to extract headers from the error response if available
+				if (error?.response?.headers) {
+					lastRateLimitHeaders = this.extractRateLimitHeaders(error.response.headers)
+				}
+
+				// Check if it's a rate limit error - retry indefinitely for 429
 				const httpError = error as HttpError
-				if (httpError?.status === 429 && hasMoreAttempts) {
-					const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts)
-					console.warn(
-						t("embeddings:rateLimitRetry", {
-							delayMs,
-							attempt: attempts + 1,
-							maxRetries: MAX_RETRIES,
-						}),
-					)
+				if (httpError?.status === 429) {
+					// Update global rate limit state
+					await this.updateGlobalRateLimitState(lastRateLimitHeaders)
+
+					const delayMs = this.calculateSmartBackoff(lastRateLimitHeaders, attempts - 1)
+
+					// Only log on first retry to avoid flooding logs
+					if (attempts === 1) {
+						console.warn(
+							t("embeddings:rateLimitRetry", {
+								delayMs,
+								attempt: attempts,
+								maxRetries: "∞", // Infinite retries for rate limits
+							}),
+						)
+
+						if (
+							lastRateLimitHeaders.remainingRequests !== undefined ||
+							lastRateLimitHeaders.remainingTokens !== undefined
+						) {
+							console.warn(
+								`Rate limits - Requests: ${lastRateLimitHeaders.remainingRequests ?? "N/A"}/${lastRateLimitHeaders.limitRequests ?? "N/A"}, ` +
+									`Tokens: ${lastRateLimitHeaders.remainingTokens ?? "N/A"}/${lastRateLimitHeaders.limitTokens ?? "N/A"}`,
+							)
+						}
+					}
+
 					await new Promise((resolve) => setTimeout(resolve, delayMs))
 					continue
 				}
 
-				// Capture telemetry before reformatting the error
-				TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
-					error: error instanceof Error ? error.message : String(error),
-					stack: error instanceof Error ? error.stack : undefined,
-					location: "OpenAiEmbedder:_embedBatchWithRetries",
-					attempt: attempts + 1,
-				})
+				// For non-rate-limit errors, apply the retry limit
+				if (attempts >= MAX_RETRIES) {
+					// Capture telemetry before reformatting the error
+					TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
+						error: error instanceof Error ? error.message : String(error),
+						stack: error instanceof Error ? error.stack : undefined,
+						location: "OpenAiEmbedder:_embedBatchWithRetries",
+						attempt: attempts,
+						rateLimitHeaders: lastRateLimitHeaders,
+					})
+
+					// Log the error for debugging
+					console.error(`OpenAI embedder error (attempt ${attempts}/${MAX_RETRIES}):`, error)
+
+					// Format and throw the error
+					throw formatEmbeddingError(error, MAX_RETRIES)
+				}
+
+				// For other errors, retry with exponential backoff up to MAX_RETRIES
+				const delayMs = INITIAL_DELAY_MS * Math.pow(2, attempts - 1)
 
-				// Log the error for debugging
-				console.error(`OpenAI embedder error (attempt ${attempts + 1}/${MAX_RETRIES}):`, error)
+				console.warn(
+					`OpenAI embedder error (attempt ${attempts}/${MAX_RETRIES}), retrying in ${delayMs}ms:`,
+					error instanceof Error ? error.message : String(error),
+				)
 
-				// Format and throw the error
-				throw formatEmbeddingError(error, MAX_RETRIES)
+				await new Promise((resolve) => setTimeout(resolve, delayMs))
 			}
 		}
-
-		throw new Error(t("embeddings:failedMaxAttempts", { attempts: MAX_RETRIES }))
 	}
 
 	/**
@@ -184,11 +339,13 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 	async validateConfiguration(): Promise<{ valid: boolean; error?: string }> {
 		return withValidationErrorHandling(async () => {
 			try {
-				// Test with a minimal embedding request
-				const response = await this.embeddingsClient.embeddings.create({
-					input: ["test"],
-					model: this.defaultModelId,
-				})
+				// Test with a minimal embedding request using withResponse to check headers
+				const { data: response } = await this.embeddingsClient.embeddings
+					.create({
+						input: ["test"],
+						model: this.defaultModelId,
+					})
+					.withResponse()
 
 				// Check if we got a valid response
 				if (!response.data || response.data.length === 0) {
@@ -216,4 +373,55 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
 			name: "openai",
 		}
 	}
+
+	/**
+	 * Waits if there's an active global rate limit
+	 */
+	private async waitForGlobalRateLimit(): Promise<void> {
+		const release = await OpenAiEmbedder.globalRateLimitState.mutex.acquire()
+		try {
+			const state = OpenAiEmbedder.globalRateLimitState
+
+			if (state.isRateLimited && state.rateLimitResetTime > Date.now()) {
+				const waitTime = state.rateLimitResetTime - Date.now()
+				// Silent wait - no logging to prevent flooding
+				release() // Release mutex before waiting
+				await new Promise((resolve) => setTimeout(resolve, waitTime))
+				return
+			}
+
+			// Reset rate limit if time has passed
+			if (state.isRateLimited && state.rateLimitResetTime <= Date.now()) {
+				state.isRateLimited = false
+				state.rateLimitHeaders = {}
+			}
+		} finally {
+			// Only release if we haven't already
+			try {
+				release()
+			} catch {
+				// Already released
+			}
+		}
+	}
+
+	/**
+	 * Updates global rate limit state when a 429 error occurs
+	 */
+	private async updateGlobalRateLimitState(headers: RateLimitHeaders): Promise<void> {
+		const release = await OpenAiEmbedder.globalRateLimitState.mutex.acquire()
+		try {
+			const state = OpenAiEmbedder.globalRateLimitState
+
+			// Calculate delay based on headers
+			const delayMs = this.calculateSmartBackoff(headers, 0)
+
+			// Set global rate limit
+			state.isRateLimited = true
+			state.rateLimitResetTime = Date.now() + delayMs
+			state.rateLimitHeaders = headers
+		} finally {
+			release()
+		}
+	}
 }