Skip to content

Commit 76e5a72

Browse files
daniel-lxsLuis Daniel Riccio Silva
andauthored
feat: add native OpenAI provider support for Codex Mini model (#5386) (#6931)
Co-authored-by: Luis Daniel Riccio Silva <[email protected]>
1 parent f53fd39 commit 76e5a72

File tree

3 files changed

+275
-26
lines changed

3 files changed

+275
-26
lines changed

packages/types/src/providers/openai.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,17 @@ export const openAiNativeModels = {
220220
outputPrice: 0.6,
221221
cacheReadsPrice: 0.075,
222222
},
223+
"codex-mini-latest": {
224+
maxTokens: 16_384,
225+
contextWindow: 200_000,
226+
supportsImages: false,
227+
supportsPromptCache: false,
228+
inputPrice: 1.5,
229+
outputPrice: 6,
230+
cacheReadsPrice: 0,
231+
description:
232+
"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
233+
},
223234
} as const satisfies Record<string, ModelInfo>
224235

225236
export const openAiModelInfoSaneDefaults: ModelInfo = {

src/api/providers/__tests__/openai-native.spec.ts

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,4 +1514,243 @@ describe("GPT-5 streaming event coverage (additional)", () => {
15141514
// @ts-ignore
15151515
delete global.fetch
15161516
})
1517+
1518+
describe("Codex Mini Model", () => {
1519+
let handler: OpenAiNativeHandler
1520+
const mockOptions: ApiHandlerOptions = {
1521+
openAiNativeApiKey: "test-api-key",
1522+
apiModelId: "codex-mini-latest",
1523+
}
1524+
1525+
it("should handle codex-mini-latest streaming response", async () => {
1526+
// Mock fetch for Codex Mini responses API
1527+
const mockFetch = vitest.fn().mockResolvedValue({
1528+
ok: true,
1529+
body: new ReadableStream({
1530+
start(controller) {
1531+
// Codex Mini uses the same responses API format
1532+
controller.enqueue(
1533+
new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":"Hello"}\n\n'),
1534+
)
1535+
controller.enqueue(
1536+
new TextEncoder().encode('data: {"type":"response.output_text.delta","delta":" from"}\n\n'),
1537+
)
1538+
controller.enqueue(
1539+
new TextEncoder().encode(
1540+
'data: {"type":"response.output_text.delta","delta":" Codex"}\n\n',
1541+
),
1542+
)
1543+
controller.enqueue(
1544+
new TextEncoder().encode(
1545+
'data: {"type":"response.output_text.delta","delta":" Mini!"}\n\n',
1546+
),
1547+
)
1548+
controller.enqueue(
1549+
new TextEncoder().encode(
1550+
'data: {"type":"response.done","response":{"usage":{"prompt_tokens":50,"completion_tokens":10}}}\n\n',
1551+
),
1552+
)
1553+
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
1554+
controller.close()
1555+
},
1556+
}),
1557+
})
1558+
global.fetch = mockFetch as any
1559+
1560+
handler = new OpenAiNativeHandler({
1561+
...mockOptions,
1562+
apiModelId: "codex-mini-latest",
1563+
})
1564+
1565+
const systemPrompt = "You are a helpful coding assistant."
1566+
const messages: Anthropic.Messages.MessageParam[] = [
1567+
{ role: "user", content: "Write a hello world function" },
1568+
]
1569+
1570+
const stream = handler.createMessage(systemPrompt, messages)
1571+
const chunks: any[] = []
1572+
for await (const chunk of stream) {
1573+
chunks.push(chunk)
1574+
}
1575+
1576+
// Verify text chunks
1577+
const textChunks = chunks.filter((c) => c.type === "text")
1578+
expect(textChunks).toHaveLength(4)
1579+
expect(textChunks.map((c) => c.text).join("")).toBe("Hello from Codex Mini!")
1580+
1581+
// Verify usage data from API
1582+
const usageChunks = chunks.filter((c) => c.type === "usage")
1583+
expect(usageChunks).toHaveLength(1)
1584+
expect(usageChunks[0]).toMatchObject({
1585+
type: "usage",
1586+
inputTokens: 50,
1587+
outputTokens: 10,
1588+
totalCost: expect.any(Number), // Codex Mini has pricing: $1.5/M input, $6/M output
1589+
})
1590+
1591+
// Verify cost is calculated correctly based on API usage data
1592+
const expectedCost = (50 / 1_000_000) * 1.5 + (10 / 1_000_000) * 6
1593+
expect(usageChunks[0].totalCost).toBeCloseTo(expectedCost, 10)
1594+
1595+
// Verify the request was made with correct parameters
1596+
expect(mockFetch).toHaveBeenCalledWith(
1597+
"https://api.openai.com/v1/responses",
1598+
expect.objectContaining({
1599+
method: "POST",
1600+
headers: expect.objectContaining({
1601+
"Content-Type": "application/json",
1602+
Authorization: "Bearer test-api-key",
1603+
Accept: "text/event-stream",
1604+
}),
1605+
body: expect.any(String),
1606+
}),
1607+
)
1608+
1609+
const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
1610+
expect(requestBody).toMatchObject({
1611+
model: "codex-mini-latest",
1612+
input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function",
1613+
stream: true,
1614+
})
1615+
1616+
// Clean up
1617+
delete (global as any).fetch
1618+
})
1619+
1620+
it("should handle codex-mini-latest non-streaming completion", async () => {
1621+
handler = new OpenAiNativeHandler({
1622+
...mockOptions,
1623+
apiModelId: "codex-mini-latest",
1624+
})
1625+
1626+
// Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming
1627+
await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow(
1628+
"completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.",
1629+
)
1630+
})
1631+
1632+
it("should handle codex-mini-latest API errors", async () => {
1633+
// Mock fetch with error response
1634+
const mockFetch = vitest.fn().mockResolvedValue({
1635+
ok: false,
1636+
status: 429,
1637+
statusText: "Too Many Requests",
1638+
text: async () => "Rate limit exceeded",
1639+
})
1640+
global.fetch = mockFetch as any
1641+
1642+
handler = new OpenAiNativeHandler({
1643+
...mockOptions,
1644+
apiModelId: "codex-mini-latest",
1645+
})
1646+
1647+
const systemPrompt = "You are a helpful assistant."
1648+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
1649+
1650+
const stream = handler.createMessage(systemPrompt, messages)
1651+
1652+
// Should throw an error (using the same error format as GPT-5)
1653+
await expect(async () => {
1654+
for await (const chunk of stream) {
1655+
// consume stream
1656+
}
1657+
}).rejects.toThrow("Rate limit exceeded")
1658+
1659+
// Clean up
1660+
delete (global as any).fetch
1661+
})
1662+
1663+
it("should handle codex-mini-latest with multiple user messages", async () => {
1664+
// Mock fetch for streaming response
1665+
const mockFetch = vitest.fn().mockResolvedValue({
1666+
ok: true,
1667+
body: new ReadableStream({
1668+
start(controller) {
1669+
controller.enqueue(
1670+
new TextEncoder().encode(
1671+
'data: {"type":"response.output_text.delta","delta":"Combined response"}\n\n',
1672+
),
1673+
)
1674+
controller.enqueue(new TextEncoder().encode('data: {"type":"response.completed"}\n\n'))
1675+
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
1676+
controller.close()
1677+
},
1678+
}),
1679+
})
1680+
global.fetch = mockFetch as any
1681+
1682+
handler = new OpenAiNativeHandler({
1683+
...mockOptions,
1684+
apiModelId: "codex-mini-latest",
1685+
})
1686+
1687+
const systemPrompt = "You are a helpful assistant."
1688+
const messages: Anthropic.Messages.MessageParam[] = [
1689+
{ role: "user", content: "First question" },
1690+
{ role: "assistant", content: "First answer" },
1691+
{ role: "user", content: "Second question" },
1692+
]
1693+
1694+
const stream = handler.createMessage(systemPrompt, messages)
1695+
const chunks: any[] = []
1696+
for await (const chunk of stream) {
1697+
chunks.push(chunk)
1698+
}
1699+
1700+
// Verify the request body includes full conversation like GPT-5
1701+
const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
1702+
expect(requestBody.input).toContain("Developer: You are a helpful assistant")
1703+
expect(requestBody.input).toContain("User: First question")
1704+
expect(requestBody.input).toContain("Assistant: First answer")
1705+
expect(requestBody.input).toContain("User: Second question")
1706+
1707+
// Clean up
1708+
delete (global as any).fetch
1709+
})
1710+
1711+
it("should handle codex-mini-latest stream error events", async () => {
1712+
// Mock fetch with error event in stream
1713+
const mockFetch = vitest.fn().mockResolvedValue({
1714+
ok: true,
1715+
body: new ReadableStream({
1716+
start(controller) {
1717+
controller.enqueue(
1718+
new TextEncoder().encode(
1719+
'data: {"type":"response.output_text.delta","delta":"Partial"}\n\n',
1720+
),
1721+
)
1722+
controller.enqueue(
1723+
new TextEncoder().encode(
1724+
'data: {"type":"response.error","error":{"message":"Model overloaded"}}\n\n',
1725+
),
1726+
)
1727+
// The error handler will throw, but we still need to close the stream
1728+
controller.close()
1729+
},
1730+
}),
1731+
})
1732+
global.fetch = mockFetch as any
1733+
1734+
handler = new OpenAiNativeHandler({
1735+
...mockOptions,
1736+
apiModelId: "codex-mini-latest",
1737+
})
1738+
1739+
const systemPrompt = "You are a helpful assistant."
1740+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
1741+
1742+
const stream = handler.createMessage(systemPrompt, messages)
1743+
1744+
// Should throw an error when encountering error event
1745+
await expect(async () => {
1746+
const chunks = []
1747+
for await (const chunk of stream) {
1748+
chunks.push(chunk)
1749+
}
1750+
}).rejects.toThrow("Responses API error: Model overloaded")
1751+
1752+
// Clean up
1753+
delete (global as any).fetch
1754+
})
1755+
})
15171756
})

src/api/providers/openai-native.ts

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
117117
yield* this.handleReasonerMessage(model, id, systemPrompt, messages)
118118
} else if (model.id.startsWith("o1")) {
119119
yield* this.handleO1FamilyMessage(model, systemPrompt, messages)
120-
} else if (this.isGpt5Model(model.id)) {
121-
yield* this.handleGpt5Message(model, systemPrompt, messages, metadata)
120+
} else if (this.isResponsesApiModel(model.id)) {
121+
// Both GPT-5 and Codex Mini use the v1/responses endpoint
122+
yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata)
122123
} else {
123124
yield* this.handleDefaultModelMessage(model, systemPrompt, messages)
124125
}
@@ -212,7 +213,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
212213
)
213214
}
214215

215-
private async *handleGpt5Message(
216+
private async *handleResponsesApiMessage(
216217
model: OpenAiNativeModel,
217218
systemPrompt: string,
218219
messages: Anthropic.Messages.MessageParam[],
@@ -221,6 +222,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
221222
// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
222223
const { verbosity } = this.getModel()
223224

225+
// Both GPT-5 and Codex Mini use the same v1/responses endpoint format
226+
224227
// Resolve reasoning effort (supports "minimal" for GPT‑5)
225228
const reasoningEffort = this.getGpt5ReasoningEffort(model)
226229

@@ -886,7 +889,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
886889
// Error event from the API
887890
if (parsed.error || parsed.message) {
888891
throw new Error(
889-
`GPT-5 API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
892+
`Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
890893
)
891894
}
892895
}
@@ -993,7 +996,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
993996
}
994997
}
995998
} catch (e) {
996-
// Silently ignore parsing errors for non-critical SSE data
999+
// Only ignore JSON parsing errors, re-throw actual API errors
1000+
if (!(e instanceof SyntaxError)) {
1001+
throw e
1002+
}
9971003
}
9981004
}
9991005
// Also try to parse non-SSE formatted lines
@@ -1131,6 +1137,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11311137
return modelId.startsWith("gpt-5")
11321138
}
11331139

1140+
private isResponsesApiModel(modelId: string): boolean {
1141+
// Both GPT-5 and Codex Mini use the v1/responses endpoint
1142+
return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest"
1143+
}
1144+
11341145
private async *handleStreamResponse(
11351146
stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
11361147
model: OpenAiNativeModel,
@@ -1197,8 +1208,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11971208
defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE,
11981209
})
11991210

1200-
// For GPT-5 models, ensure we support minimal reasoning effort
1201-
if (this.isGpt5Model(id)) {
1211+
// For models using the Responses API (GPT-5 and Codex Mini), ensure we support reasoning effort
1212+
if (this.isResponsesApiModel(id)) {
12021213
const effort =
12031214
(this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ??
12041215
(info.reasoningEffort as ReasoningEffortWithMinimal | undefined)
@@ -1234,13 +1245,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
12341245
async completePrompt(prompt: string): Promise<string> {
12351246
try {
12361247
const { id, temperature, reasoning, verbosity } = this.getModel()
1237-
const isGpt5 = this.isGpt5Model(id)
1248+
const isResponsesApi = this.isResponsesApiModel(id)
12381249

1239-
if (isGpt5) {
1240-
// GPT-5 uses the Responses API, not Chat Completions. Avoid undefined behavior here.
1241-
throw new Error(
1242-
"completePrompt is not supported for GPT-5 models. Use createMessage (Responses API) instead.",
1243-
)
1250+
if (isResponsesApi) {
1251+
// Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion
1252+
throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`)
12441253
}
12451254

12461255
const params: any = {
@@ -1253,19 +1262,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
12531262
params.temperature = temperature
12541263
}
12551264

1256-
// For GPT-5 models, add reasoning_effort and verbosity as top-level parameters
1257-
if (isGpt5) {
1258-
if (reasoning && "reasoning_effort" in reasoning) {
1259-
params.reasoning_effort = reasoning.reasoning_effort
1260-
}
1261-
if (verbosity) {
1262-
params.verbosity = verbosity
1263-
}
1264-
} else {
1265-
// For non-GPT-5 models, add reasoning as is
1266-
if (reasoning) {
1267-
Object.assign(params, reasoning)
1268-
}
1265+
// Add reasoning parameters for models that support them
1266+
if (reasoning) {
1267+
Object.assign(params, reasoning)
12691268
}
12701269

12711270
const response = await this.client.chat.completions.create(params)

0 commit comments

Comments
 (0)