Skip to content

Commit 737d70d

Browse files
committed
feat: add Codex Mini support using existing GPT-5 infrastructure
- Add codex-mini-latest model definition with pricing (.5/M input, /M output) - Reuse existing v1/responses endpoint infrastructure (same as GPT-5) - Add isResponsesApiModel() method to identify models using responses endpoint - Rename handleGpt5Message to handleResponsesApiMessage for clarity - Add comprehensive test coverage for Codex Mini - Fix error handling in GPT-5 stream handler to properly re-throw API errors - Use generic 'Responses API' error messages since both models share the endpoint The implementation keeps the GPT-5 path completely unchanged while allowing Codex Mini to reuse the same infrastructure with minimal code changes.
1 parent 4e0509b commit 737d70d

File tree

2 files changed

+21
-136
lines changed

2 files changed

+21
-136
lines changed

src/api/providers/__tests__/openai-native.spec.ts

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1545,7 +1545,11 @@ describe("GPT-5 streaming event coverage (additional)", () => {
15451545
'data: {"type":"response.output_text.delta","delta":" Mini!"}\n\n',
15461546
),
15471547
)
1548-
controller.enqueue(new TextEncoder().encode('data: {"type":"response.completed"}\n\n'))
1548+
controller.enqueue(
1549+
new TextEncoder().encode(
1550+
'data: {"type":"response.done","response":{"usage":{"prompt_tokens":50,"completion_tokens":10}}}\n\n',
1551+
),
1552+
)
15491553
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
15501554
controller.close()
15511555
},
@@ -1574,18 +1578,19 @@ describe("GPT-5 streaming event coverage (additional)", () => {
15741578
expect(textChunks).toHaveLength(4)
15751579
expect(textChunks.map((c) => c.text).join("")).toBe("Hello from Codex Mini!")
15761580

1577-
// Verify usage estimation (based on character count)
1581+
// Verify usage data from API
15781582
const usageChunks = chunks.filter((c) => c.type === "usage")
15791583
expect(usageChunks).toHaveLength(1)
15801584
expect(usageChunks[0]).toMatchObject({
15811585
type: "usage",
1582-
inputTokens: expect.any(Number),
1583-
outputTokens: expect.any(Number),
1586+
inputTokens: 50,
1587+
outputTokens: 10,
15841588
totalCost: expect.any(Number), // Codex Mini has pricing: $1.5/M input, $6/M output
15851589
})
15861590

1587-
// Verify cost is calculated correctly
1588-
expect(usageChunks[0].totalCost).toBeGreaterThan(0)
1591+
// Verify cost is calculated correctly based on API usage data
1592+
const expectedCost = (50 / 1_000_000) * 1.5 + (10 / 1_000_000) * 6
1593+
expect(usageChunks[0].totalCost).toBeCloseTo(expectedCost, 10)
15891594

15901595
// Verify the request was made with correct parameters
15911596
expect(mockFetch).toHaveBeenCalledWith(
@@ -1677,12 +1682,12 @@ describe("GPT-5 streaming event coverage (additional)", () => {
16771682

16781683
const stream = handler.createMessage(systemPrompt, messages)
16791684

1680-
// Should throw an error
1685+
// Should throw an error (using the same error format as GPT-5)
16811686
await expect(async () => {
16821687
for await (const chunk of stream) {
16831688
// consume stream
16841689
}
1685-
}).rejects.toThrow("Codex Mini API request failed (429): Rate limit exceeded")
1690+
}).rejects.toThrow("Rate limit exceeded")
16861691

16871692
// Clean up
16881693
delete (global as any).fetch
@@ -1750,6 +1755,7 @@ describe("GPT-5 streaming event coverage (additional)", () => {
17501755
'data: {"type":"response.error","error":{"message":"Model overloaded"}}\n\n',
17511756
),
17521757
)
1758+
// The error handler will throw, but we still need to close the stream
17531759
controller.close()
17541760
},
17551761
}),
@@ -1772,7 +1778,7 @@ describe("GPT-5 streaming event coverage (additional)", () => {
17721778
for await (const chunk of stream) {
17731779
chunks.push(chunk)
17741780
}
1775-
}).rejects.toThrow("Codex Mini stream error: Model overloaded")
1781+
}).rejects.toThrow("Responses API error: Model overloaded")
17761782

17771783
// Clean up
17781784
delete (global as any).fetch

src/api/providers/openai-native.ts

Lines changed: 6 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
893893
// Error event from the API
894894
if (parsed.error || parsed.message) {
895895
throw new Error(
896-
`GPT-5 API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
896+
`Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
897897
)
898898
}
899899
}
@@ -1000,7 +1000,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
10001000
}
10011001
}
10021002
} catch (e) {
1003-
// Silently ignore parsing errors for non-critical SSE data
1003+
// Only ignore JSON parsing errors, re-throw actual API errors
1004+
if (!(e instanceof SyntaxError)) {
1005+
throw e
1006+
}
10041007
}
10051008
}
10061009
// Also try to parse non-SSE formatted lines
@@ -1148,7 +1151,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11481151
systemPrompt: string,
11491152
messages: Anthropic.Messages.MessageParam[],
11501153
): ApiStream {
1151-
// Convert messages to a simple input format for Codex Mini
11521154
const input = messages
11531155
.filter((msg) => msg.role === "user")
11541156
.map((msg) => {
@@ -1173,130 +1175,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11731175
stream: true,
11741176
}
11751177

1176-
// Use the existing responses API infrastructure
1177-
const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
1178-
const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com"
1179-
const url = `${baseUrl}/v1/responses`
1180-
1181-
try {
1182-
const response = await fetch(url, {
1183-
method: "POST",
1184-
headers: {
1185-
"Content-Type": "application/json",
1186-
Authorization: `Bearer ${apiKey}`,
1187-
Accept: "text/event-stream",
1188-
},
1189-
body: JSON.stringify(requestBody),
1190-
})
1191-
1192-
if (!response.ok) {
1193-
const errorText = await response.text()
1194-
let errorMessage = `Codex Mini API request failed (${response.status})`
1195-
1196-
try {
1197-
const errorJson = JSON.parse(errorText)
1198-
if (errorJson.error?.message) {
1199-
errorMessage += `: ${errorJson.error.message}`
1200-
} else if (errorJson.message) {
1201-
errorMessage += `: ${errorJson.message}`
1202-
} else {
1203-
errorMessage += `: ${errorText}`
1204-
}
1205-
} catch {
1206-
errorMessage += `: ${errorText}`
1207-
}
1208-
1209-
throw new Error(errorMessage)
1210-
}
1211-
1212-
if (!response.body) {
1213-
throw new Error("Codex Mini Responses API error: No response body")
1214-
}
1215-
1216-
// Handle the streaming response for Codex Mini
1217-
yield* this.handleCodexMiniStreamResponse(response.body, model, systemPrompt, input)
1218-
} catch (error) {
1219-
if (error instanceof Error) {
1220-
throw error
1221-
}
1222-
throw new Error(`Unexpected error connecting to Codex Mini API`)
1223-
}
1224-
}
1225-
1226-
private async *handleCodexMiniStreamResponse(
1227-
body: ReadableStream<Uint8Array>,
1228-
model: OpenAiNativeModel,
1229-
systemPrompt: string,
1230-
userInput: string,
1231-
): ApiStream {
1232-
const reader = body.getReader()
1233-
const decoder = new TextDecoder()
1234-
let buffer = ""
1235-
let totalText = ""
1236-
1237-
try {
1238-
while (true) {
1239-
const { done, value } = await reader.read()
1240-
if (done) break
1241-
1242-
buffer += decoder.decode(value, { stream: true })
1243-
const lines = buffer.split("\n")
1244-
buffer = lines.pop() || ""
1245-
1246-
for (const line of lines) {
1247-
if (line.trim() === "") continue
1248-
if (line.startsWith("data: ")) {
1249-
const data = line.slice(6)
1250-
if (data === "[DONE]") continue
1251-
1252-
try {
1253-
const event = JSON.parse(data)
1254-
1255-
// Handle different event types from responses API
1256-
if (event.type === "response.output_text.delta") {
1257-
yield {
1258-
type: "text",
1259-
text: event.delta,
1260-
}
1261-
totalText += event.delta
1262-
} else if (event.type === "response.completed" || event.type === "response.done") {
1263-
// Calculate usage based on text length (approximate)
1264-
// Estimate tokens: ~1 token per 4 characters
1265-
const promptTokens = Math.ceil((systemPrompt.length + userInput.length) / 4)
1266-
const completionTokens = Math.ceil(totalText.length / 4)
1267-
1268-
const totalCost = calculateApiCostOpenAI(
1269-
model.info,
1270-
promptTokens,
1271-
completionTokens,
1272-
0,
1273-
0,
1274-
)
1275-
1276-
yield {
1277-
type: "usage",
1278-
inputTokens: promptTokens,
1279-
outputTokens: completionTokens,
1280-
cacheWriteTokens: 0,
1281-
cacheReadTokens: 0,
1282-
totalCost,
1283-
}
1284-
} else if (event.type === "response.error") {
1285-
throw new Error(`Codex Mini stream error: ${event.error?.message || "Unknown error"}`)
1286-
}
1287-
} catch (e) {
1288-
if (e instanceof SyntaxError) {
1289-
console.debug("Codex Mini: Failed to parse SSE data", data)
1290-
} else {
1291-
throw e
1292-
}
1293-
}
1294-
}
1295-
}
1296-
}
1297-
} finally {
1298-
reader.releaseLock()
1299-
}
1178+
yield* this.makeGpt5ResponsesAPIRequest(requestBody, model)
13001179
}
13011180

13021181
private async *handleStreamResponse(

0 commit comments

Comments
 (0)