Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,17 @@ export const openAiNativeModels = {
outputPrice: 0.6,
cacheReadsPrice: 0.075,
},
"codex-mini-latest": {
maxTokens: 16_384,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.5,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pricing values here (1.5 and 6) appear to be correct based on issue #5386, but the PR description mentions '.5/M' and '/M' which could be confusing. Could you clarify if these are the correct values? The issue mentions these are the actual numeric values without dollar signs.

outputPrice: 6,
cacheReadsPrice: 0,
description:
"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
},
} as const satisfies Record<string, ModelInfo>

export const openAiModelInfoSaneDefaults: ModelInfo = {
Expand Down
239 changes: 239 additions & 0 deletions src/api/providers/__tests__/openai-native.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1514,4 +1514,243 @@ describe("GPT-5 streaming event coverage (additional)", () => {
// @ts-ignore
delete global.fetch
})

describe("Codex Mini Model", () => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great test coverage! However, it would be valuable to add integration tests that mock at a higher level (e.g., testing the full flow from API handler to response). This would catch issues with the integration between different components.

let handler: OpenAiNativeHandler
const mockOptions: ApiHandlerOptions = {
openAiNativeApiKey: "test-api-key",
apiModelId: "codex-mini-latest",
}

it("should handle codex-mini-latest streaming response", async () => {
// Mock fetch for Codex Mini responses API
const mockFetch = vitest.fn().mockResolvedValue({
ok: true,
body: new ReadableStream({
start(controller) {
// Codex Mini uses the same responses API format
controller.enqueue(
new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":"Hello"}\n\n'),
)
controller.enqueue(
new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":" from"}\n\n'),
)
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.output_text.delta","delta":" Codex"}\n\n',
),
)
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.output_text.delta","delta":" Mini!"}\n\n',
),
)
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.done","response":{"usage":{"prompt_tokens":50,"completion_tokens":10}}}\n\n',
),
)
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
controller.close()
},
}),
})
global.fetch = mockFetch as any

handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "codex-mini-latest",
})

const systemPrompt = "You are a helpful coding assistant."
const messages: Anthropic.Messages.MessageParam[] = [
{ role: "user", content: "Write a hello world function" },
]

const stream = handler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify text chunks
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(4)
expect(textChunks.map((c) => c.text).join("")).toBe("Hello from Codex Mini!")

// Verify usage data from API
const usageChunks = chunks.filter((c) => c.type === "usage")
expect(usageChunks).toHaveLength(1)
expect(usageChunks[0]).toMatchObject({
type: "usage",
inputTokens: 50,
outputTokens: 10,
totalCost: expect.any(Number), // Codex Mini has pricing: $1.5/M input, $6/M output
})

// Verify cost is calculated correctly based on API usage data
const expectedCost = (50 / 1_000_000) * 1.5 + (10 / 1_000_000) * 6
expect(usageChunks[0].totalCost).toBeCloseTo(expectedCost, 10)

// Verify the request was made with correct parameters
expect(mockFetch).toHaveBeenCalledWith(
"https://api.openai.com/v1/responses",
expect.objectContaining({
method: "POST",
headers: expect.objectContaining({
"Content-Type": "application/json",
Authorization: "Bearer test-api-key",
Accept: "text/event-stream",
}),
body: expect.any(String),
}),
)

const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
expect(requestBody).toMatchObject({
model: "codex-mini-latest",
input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function",
stream: true,
})

// Clean up
delete (global as any).fetch
})

it("should handle codex-mini-latest non-streaming completion", async () => {
handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "codex-mini-latest",
})

// Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming
await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow(
"completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.",
)
})

it("should handle codex-mini-latest API errors", async () => {
// Mock fetch with error response
const mockFetch = vitest.fn().mockResolvedValue({
ok: false,
status: 429,
statusText: "Too Many Requests",
text: async () => "Rate limit exceeded",
})
global.fetch = mockFetch as any

handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "codex-mini-latest",
})

const systemPrompt = "You are a helpful assistant."
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]

const stream = handler.createMessage(systemPrompt, messages)

// Should throw an error (using the same error format as GPT-5)
await expect(async () => {
for await (const chunk of stream) {
// consume stream
}
}).rejects.toThrow("Rate limit exceeded")

// Clean up
delete (global as any).fetch
})

it("should handle codex-mini-latest with multiple user messages", async () => {
// Mock fetch for streaming response
const mockFetch = vitest.fn().mockResolvedValue({
ok: true,
body: new ReadableStream({
start(controller) {
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.output_text.delta","delta":"Combined response"}\n\n',
),
)
controller.enqueue(new TextEncoder().encode('data: {"type":"response.completed"}\n\n'))
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
controller.close()
},
}),
})
global.fetch = mockFetch as any

handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "codex-mini-latest",
})

const systemPrompt = "You are a helpful assistant."
const messages: Anthropic.Messages.MessageParam[] = [
{ role: "user", content: "First question" },
{ role: "assistant", content: "First answer" },
{ role: "user", content: "Second question" },
]

const stream = handler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify the request body includes full conversation like GPT-5
const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
expect(requestBody.input).toContain("Developer: You are a helpful assistant")
expect(requestBody.input).toContain("User: First question")
expect(requestBody.input).toContain("Assistant: First answer")
expect(requestBody.input).toContain("User: Second question")

// Clean up
delete (global as any).fetch
})

it("should handle codex-mini-latest stream error events", async () => {
// Mock fetch with error event in stream
const mockFetch = vitest.fn().mockResolvedValue({
ok: true,
body: new ReadableStream({
start(controller) {
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.output_text.delta","delta":"Partial"}\n\n',
),
)
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.error","error":{"message":"Model overloaded"}}\n\n',
),
)
// The error handler will throw, but we still need to close the stream
controller.close()
},
}),
})
global.fetch = mockFetch as any

handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "codex-mini-latest",
})

const systemPrompt = "You are a helpful assistant."
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]

const stream = handler.createMessage(systemPrompt, messages)

// Should throw an error when encountering error event
await expect(async () => {
const chunks = []
for await (const chunk of stream) {
chunks.push(chunk)
}
}).rejects.toThrow("Responses API error: Model overloaded")

// Clean up
delete (global as any).fetch
})
})
})
51 changes: 25 additions & 26 deletions src/api/providers/openai-native.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
yield* this.handleReasonerMessage(model, id, systemPrompt, messages)
} else if (model.id.startsWith("o1")) {
yield* this.handleO1FamilyMessage(model, systemPrompt, messages)
} else if (this.isGpt5Model(model.id)) {
yield* this.handleGpt5Message(model, systemPrompt, messages, metadata)
} else if (this.isResponsesApiModel(model.id)) {
// Both GPT-5 and Codex Mini use the v1/responses endpoint
yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata)
} else {
yield* this.handleDefaultModelMessage(model, systemPrompt, messages)
}
Expand Down Expand Up @@ -212,7 +213,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
)
}

private async *handleGpt5Message(
private async *handleResponsesApiMessage(
model: OpenAiNativeModel,
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
Expand All @@ -221,6 +222,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
const { verbosity } = this.getModel()

// Both GPT-5 and Codex Mini use the same v1/responses endpoint format

// Resolve reasoning effort (supports "minimal" for GPT‑5)
const reasoningEffort = this.getGpt5ReasoningEffort(model)

Expand Down Expand Up @@ -886,7 +889,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
// Error event from the API
if (parsed.error || parsed.message) {
throw new Error(
`GPT-5 API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
`Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
)
}
}
Expand Down Expand Up @@ -993,7 +996,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
}
}
} catch (e) {
// Silently ignore parsing errors for non-critical SSE data
// Only ignore JSON parsing errors, re-throw actual API errors
if (!(e instanceof SyntaxError)) {
throw e
}
}
}
// Also try to parse non-SSE formatted lines
Expand Down Expand Up @@ -1131,6 +1137,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
return modelId.startsWith("gpt-5")
}

private isResponsesApiModel(modelId: string): boolean {
// Both GPT-5 and Codex Mini use the v1/responses endpoint
return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest"
}

private async *handleStreamResponse(
stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
model: OpenAiNativeModel,
Expand Down Expand Up @@ -1197,8 +1208,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE,
})

// For GPT-5 models, ensure we support minimal reasoning effort
if (this.isGpt5Model(id)) {
// For models using the Responses API (GPT-5 and Codex Mini), ensure we support reasoning effort
if (this.isResponsesApiModel(id)) {
const effort =
(this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ??
(info.reasoningEffort as ReasoningEffortWithMinimal | undefined)
Expand Down Expand Up @@ -1234,13 +1245,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
async completePrompt(prompt: string): Promise<string> {
try {
const { id, temperature, reasoning, verbosity } = this.getModel()
const isGpt5 = this.isGpt5Model(id)
const isResponsesApi = this.isResponsesApiModel(id)

if (isGpt5) {
// GPT-5 uses the Responses API, not Chat Completions. Avoid undefined behavior here.
throw new Error(
"completePrompt is not supported for GPT-5 models. Use createMessage (Responses API) instead.",
)
if (isResponsesApi) {
// Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion
throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`)
}

const params: any = {
Expand All @@ -1253,19 +1262,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
params.temperature = temperature
}

// For GPT-5 models, add reasoning_effort and verbosity as top-level parameters
if (isGpt5) {
if (reasoning && "reasoning_effort" in reasoning) {
params.reasoning_effort = reasoning.reasoning_effort
}
if (verbosity) {
params.verbosity = verbosity
}
} else {
// For non-GPT-5 models, add reasoning as is
if (reasoning) {
Object.assign(params, reasoning)
}
// Add reasoning parameters for models that support them
if (reasoning) {
Object.assign(params, reasoning)
}

const response = await this.client.chat.completions.create(params)
Expand Down
Loading