refactor: remove special Codex Mini handling - use same GPT-5 infrastructure

daniel-lxs · daniel-lxs · commit ee9d7c6c4f3e · 2025-08-11T09:49:23.000-05:00
- Removed handleCodexMiniWithResponsesApi method as Codex Mini should behave exactly like GPT-5
- Both models now use the same v1/responses endpoint format
- Updated tests to reflect unified behavior
- Addresses review feedback about including all messages in conversation context
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
@@ -1609,8 +1609,7 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
 			expect(requestBody).toMatchObject({
 				model: "codex-mini-latest",
-				instructions: systemPrompt,
-				input: "Write a hello world function",
+				input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function",
 				stream: true,
 			})
 
@@ -1619,47 +1618,15 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 		})
 
 		it("should handle codex-mini-latest non-streaming completion", async () => {
-			// Mock fetch for non-streaming response
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				json: async () => ({
-					output_text: "def hello_world():\n    print('Hello, World!')",
-				}),
-			})
-			global.fetch = mockFetch as any
-
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "codex-mini-latest",
 			})
 
-			const result = await handler.completePrompt("Write a hello world function in Python")
-
-			expect(result).toBe("def hello_world():\n    print('Hello, World!')")
-
-			// Verify the request
-			expect(mockFetch).toHaveBeenCalledWith(
-				"https://api.openai.com/v1/responses",
-				expect.objectContaining({
-					method: "POST",
-					headers: expect.objectContaining({
-						"Content-Type": "application/json",
-						Authorization: "Bearer test-api-key",
-					}),
-					body: expect.any(String),
-				}),
+			// Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming
+			await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow(
+				"completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.",
 			)
-
-			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(requestBody).toMatchObject({
-				model: "codex-mini-latest",
-				instructions: "Complete the following prompt:",
-				input: "Write a hello world function in Python",
-				stream: false,
-			})
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should handle codex-mini-latest API errors", async () => {
@@ -1730,10 +1697,12 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify the request body only includes user messages
+			// Verify the request body includes full conversation like GPT-5
 			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(requestBody.input).toBe("First question\n\nSecond question")
-			expect(requestBody.input).not.toContain("First answer")
+			expect(requestBody.input).toContain("Developer: You are a helpful assistant")
+			expect(requestBody.input).toContain("User: First question")
+			expect(requestBody.input).toContain("Assistant: First answer")
+			expect(requestBody.input).toContain("User: Second question")
 
 			// Clean up
 			delete (global as any).fetch
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -222,11 +222,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
 		const { verbosity } = this.getModel()
 
-		// For Codex Mini, we use a simpler request format
-		if (model.id === "codex-mini-latest") {
-			yield* this.handleCodexMiniWithResponsesApi(model, systemPrompt, messages)
-			return
-		}
+		// Both GPT-5 and Codex Mini use the same v1/responses endpoint format
 
 		// Resolve reasoning effort (supports "minimal" for GPT‑5)
 		const reasoningEffort = this.getGpt5ReasoningEffort(model)
@@ -1146,38 +1142,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest"
 	}
 
-	private async *handleCodexMiniWithResponsesApi(
-		model: OpenAiNativeModel,
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-	): ApiStream {
-		const input = messages
-			.filter((msg) => msg.role === "user")
-			.map((msg) => {
-				if (typeof msg.content === "string") {
-					return msg.content
-				} else if (Array.isArray(msg.content)) {
-					return msg.content
-						.filter((part) => part.type === "text")
-						.map((part) => (part as any).text)
-						.join("\n")
-				}
-				return ""
-			})
-			.filter((content) => content)
-			.join("\n\n")
-
-		// Build request body for Codex Mini
-		const requestBody = {
-			model: model.id,
-			instructions: systemPrompt,
-			input: input,
-			stream: true,
-		}
-
-		yield* this.makeGpt5ResponsesAPIRequest(requestBody, model)
-	}
-
 	private async *handleStreamResponse(
 		stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
 		model: OpenAiNativeModel,
@@ -1284,41 +1248,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			const isResponsesApi = this.isResponsesApiModel(id)
 
 			if (isResponsesApi) {
-				// Handle models that use the Responses API
-				if (id === "codex-mini-latest") {
-					// Codex Mini can use the responses API for non-streaming completion
-					const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
-					const baseURL = this.options.openAiNativeBaseUrl ?? "https://api.openai.com/v1"
-
-					const response = await fetch(`${baseURL}/responses`, {
-						method: "POST",
-						headers: {
-							"Content-Type": "application/json",
-							Authorization: `Bearer ${apiKey}`,
-						},
-						body: JSON.stringify({
-							model: id,
-							instructions: "Complete the following prompt:",
-							input: prompt,
-							stream: false,
-						}),
-					})
-
-					if (!response.ok) {
-						const errorText = await response.text()
-						throw new Error(
-							`Codex Mini API error: ${response.status} ${response.statusText} - ${errorText}`,
-						)
-					}
-
-					const data = await response.json()
-					return data.output_text || ""
-				} else {
-					// GPT-5 models don't support non-streaming completion
-					throw new Error(
-						"completePrompt is not supported for GPT-5 models. Use createMessage (Responses API) instead.",
-					)
-				}
+				// Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion
+				throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`)
 			}
 
 			const params: any = {