feat: add native OpenAI provider support for Codex Mini model (#5386) (#6931)

daniel-lxs · Luis Daniel Riccio Silva · web-flow · commit 76e5a726a3ee · 2025-08-11T11:16:01.000-04:00
Co-authored-by: Luis Daniel Riccio Silva &lt;danriccio@Dans-MacBook-Pro.local&gt;
diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
@@ -220,6 +220,17 @@ export const openAiNativeModels = {
 		outputPrice: 0.6,
 		cacheReadsPrice: 0.075,
 	},
+	"codex-mini-latest": {
+		maxTokens: 16_384,
+		contextWindow: 200_000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 1.5,
+		outputPrice: 6,
+		cacheReadsPrice: 0,
+		description:
+			"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
+	},
 } as const satisfies Record<string, ModelInfo>
 
 export const openAiModelInfoSaneDefaults: ModelInfo = {
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
@@ -1514,4 +1514,243 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 		// @ts-ignore
 		delete global.fetch
 	})
+
+	describe("Codex Mini Model", () => {
+		let handler: OpenAiNativeHandler
+		const mockOptions: ApiHandlerOptions = {
+			openAiNativeApiKey: "test-api-key",
+			apiModelId: "codex-mini-latest",
+		}
+
+		it("should handle codex-mini-latest streaming response", async () => {
+			// Mock fetch for Codex Mini responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						// Codex Mini uses the same responses API format
+						controller.enqueue(
+							new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":"Hello"}\n\n'),
+						)
+						controller.enqueue(
+							new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":" from"}\n\n'),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_text.delta","delta":" Codex"}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_text.delta","delta":" Mini!"}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.done","response":{"usage":{"prompt_tokens":50,"completion_tokens":10}}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "codex-mini-latest",
+			})
+
+			const systemPrompt = "You are a helpful coding assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "Write a hello world function" },
+			]
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify text chunks
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(4)
+			expect(textChunks.map((c) => c.text).join("")).toBe("Hello from Codex Mini!")
+
+			// Verify usage data from API
+			const usageChunks = chunks.filter((c) => c.type === "usage")
+			expect(usageChunks).toHaveLength(1)
+			expect(usageChunks[0]).toMatchObject({
+				type: "usage",
+				inputTokens: 50,
+				outputTokens: 10,
+				totalCost: expect.any(Number), // Codex Mini has pricing: $1.5/M input, $6/M output
+			})
+
+			// Verify cost is calculated correctly based on API usage data
+			const expectedCost = (50 / 1_000_000) * 1.5 + (10 / 1_000_000) * 6
+			expect(usageChunks[0].totalCost).toBeCloseTo(expectedCost, 10)
+
+			// Verify the request was made with correct parameters
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
+				expect.objectContaining({
+					method: "POST",
+					headers: expect.objectContaining({
+						"Content-Type": "application/json",
+						Authorization: "Bearer test-api-key",
+						Accept: "text/event-stream",
+					}),
+					body: expect.any(String),
+				}),
+			)
+
+			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
+			expect(requestBody).toMatchObject({
+				model: "codex-mini-latest",
+				input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function",
+				stream: true,
+			})
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should handle codex-mini-latest non-streaming completion", async () => {
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "codex-mini-latest",
+			})
+
+			// Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming
+			await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow(
+				"completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.",
+			)
+		})
+
+		it("should handle codex-mini-latest API errors", async () => {
+			// Mock fetch with error response
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: false,
+				status: 429,
+				statusText: "Too Many Requests",
+				text: async () => "Rate limit exceeded",
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "codex-mini-latest",
+			})
+
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+			const stream = handler.createMessage(systemPrompt, messages)
+
+			// Should throw an error (using the same error format as GPT-5)
+			await expect(async () => {
+				for await (const chunk of stream) {
+					// consume stream
+				}
+			}).rejects.toThrow("Rate limit exceeded")
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should handle codex-mini-latest with multiple user messages", async () => {
+			// Mock fetch for streaming response
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_text.delta","delta":"Combined response"}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode('data: {"type":"response.completed"}\n\n'))
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "codex-mini-latest",
+			})
+
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "First question" },
+				{ role: "assistant", content: "First answer" },
+				{ role: "user", content: "Second question" },
+			]
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify the request body includes full conversation like GPT-5
+			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
+			expect(requestBody.input).toContain("Developer: You are a helpful assistant")
+			expect(requestBody.input).toContain("User: First question")
+			expect(requestBody.input).toContain("Assistant: First answer")
+			expect(requestBody.input).toContain("User: Second question")
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should handle codex-mini-latest stream error events", async () => {
+			// Mock fetch with error event in stream
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_text.delta","delta":"Partial"}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.error","error":{"message":"Model overloaded"}}\n\n',
+							),
+						)
+						// The error handler will throw, but we still need to close the stream
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "codex-mini-latest",
+			})
+
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+			const stream = handler.createMessage(systemPrompt, messages)
+
+			// Should throw an error when encountering error event
+			await expect(async () => {
+				const chunks = []
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+			}).rejects.toThrow("Responses API error: Model overloaded")
+
+			// Clean up
+			delete (global as any).fetch
+		})
+	})
 })
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -117,8 +117,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			yield* this.handleReasonerMessage(model, id, systemPrompt, messages)
 		} else if (model.id.startsWith("o1")) {
 			yield* this.handleO1FamilyMessage(model, systemPrompt, messages)
-		} else if (this.isGpt5Model(model.id)) {
-			yield* this.handleGpt5Message(model, systemPrompt, messages, metadata)
+		} else if (this.isResponsesApiModel(model.id)) {
+			// Both GPT-5 and Codex Mini use the v1/responses endpoint
+			yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata)
 		} else {
 			yield* this.handleDefaultModelMessage(model, systemPrompt, messages)
 		}
@@ -212,7 +213,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		)
 	}
 
-	private async *handleGpt5Message(
+	private async *handleResponsesApiMessage(
 		model: OpenAiNativeModel,
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
@@ -221,6 +222,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
 		const { verbosity } = this.getModel()
 
+		// Both GPT-5 and Codex Mini use the same v1/responses endpoint format
+
 		// Resolve reasoning effort (supports "minimal" for GPT‑5)
 		const reasoningEffort = this.getGpt5ReasoningEffort(model)
 
@@ -886,7 +889,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 								// Error event from the API
 								if (parsed.error || parsed.message) {
 									throw new Error(
-										`GPT-5 API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
+										`Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
 									)
 								}
 							}
@@ -993,7 +996,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 								}
 							}
 						} catch (e) {
-							// Silently ignore parsing errors for non-critical SSE data
+							// Only ignore JSON parsing errors, re-throw actual API errors
+							if (!(e instanceof SyntaxError)) {
+								throw e
+							}
 						}
 					}
 					// Also try to parse non-SSE formatted lines
@@ -1131,6 +1137,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return modelId.startsWith("gpt-5")
 	}
 
+	private isResponsesApiModel(modelId: string): boolean {
+		// Both GPT-5 and Codex Mini use the v1/responses endpoint
+		return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest"
+	}
+
 	private async *handleStreamResponse(
 		stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
 		model: OpenAiNativeModel,
@@ -1197,8 +1208,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE,
 		})
 
-		// For GPT-5 models, ensure we support minimal reasoning effort
-		if (this.isGpt5Model(id)) {
+		// For models using the Responses API (GPT-5 and Codex Mini), ensure we support reasoning effort
+		if (this.isResponsesApiModel(id)) {
 			const effort =
 				(this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ??
 				(info.reasoningEffort as ReasoningEffortWithMinimal | undefined)
@@ -1234,13 +1245,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const { id, temperature, reasoning, verbosity } = this.getModel()
-			const isGpt5 = this.isGpt5Model(id)
+			const isResponsesApi = this.isResponsesApiModel(id)
 
-			if (isGpt5) {
-				// GPT-5 uses the Responses API, not Chat Completions. Avoid undefined behavior here.
-				throw new Error(
-					"completePrompt is not supported for GPT-5 models. Use createMessage (Responses API) instead.",
-				)
+			if (isResponsesApi) {
+				// Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion
+				throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`)
 			}
 
 			const params: any = {
@@ -1253,19 +1262,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				params.temperature = temperature
 			}
 
-			// For GPT-5 models, add reasoning_effort and verbosity as top-level parameters
-			if (isGpt5) {
-				if (reasoning && "reasoning_effort" in reasoning) {
-					params.reasoning_effort = reasoning.reasoning_effort
-				}
-				if (verbosity) {
-					params.verbosity = verbosity
-				}
-			} else {
-				// For non-GPT-5 models, add reasoning as is
-				if (reasoning) {
-					Object.assign(params, reasoning)
-				}
+			// Add reasoning parameters for models that support them
+			if (reasoning) {
+				Object.assign(params, reasoning)
 			}
 
 			const response = await this.client.chat.completions.create(params)