@@ -22,6 +22,9 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
2222
2323export type OpenAiNativeModel = ReturnType < OpenAiNativeHandler [ "getModel" ] >
2424
25+ // Token estimation constants
26+ const CHARS_PER_TOKEN = 4 // Approximate ratio used across the codebase
27+
2528export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
2629 protected options : ApiHandlerOptions
2730 private client : OpenAI
@@ -39,18 +42,20 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
3942 metadata ?: ApiHandlerCreateMessageMetadata ,
4043 ) : ApiStream {
4144 const model = this . getModel ( )
42- let id : "o3-mini" | "o3" | "o4-mini" | undefined
43-
44- if ( model . id . startsWith ( "o3-mini" ) ) {
45- id = "o3-mini"
46- } else if ( model . id . startsWith ( "o3" ) ) {
47- id = "o3"
48- } else if ( model . id . startsWith ( "o4-mini" ) ) {
49- id = "o4-mini"
50- }
5145
52- if ( id ) {
53- yield * this . handleReasonerMessage ( model , id , systemPrompt , messages )
46+ // Handle O3 and O4-mini models which support reasoning_effort parameter
47+ // These models use the "developer" role and require model ID normalization
48+ if ( model . id . startsWith ( "o3-mini" ) || model . id . startsWith ( "o3" ) || model . id . startsWith ( "o4-mini" ) ) {
49+ // Normalize model ID for API calls (e.g., "o3-mini-high" -> "o3-mini")
50+ let normalizedId : "o3-mini" | "o3" | "o4-mini"
51+ if ( model . id . startsWith ( "o3-mini" ) ) {
52+ normalizedId = "o3-mini"
53+ } else if ( model . id . startsWith ( "o3" ) ) {
54+ normalizedId = "o3"
55+ } else {
56+ normalizedId = "o4-mini"
57+ }
58+ yield * this . handleO3O4MiniMessage ( model , normalizedId , systemPrompt , messages )
5459 } else if ( model . id . startsWith ( "o1" ) ) {
5560 yield * this . handleO1FamilyMessage ( model , systemPrompt , messages )
5661 } else if ( model . id === "codex-mini-latest" ) {
@@ -84,7 +89,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
8489 yield * this . handleStreamResponse ( response , model )
8590 }
8691
87- private async * handleReasonerMessage (
92+ private async * handleO3O4MiniMessage (
8893 model : OpenAiNativeModel ,
8994 family : "o3-mini" | "o3" | "o4-mini" ,
9095 systemPrompt : string ,
@@ -131,6 +136,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
131136 messages : Anthropic . Messages . MessageParam [ ] ,
132137 ) : ApiStream {
133138 // Convert messages to a single input string
139+ // The codex-mini-latest model uses the v1/responses endpoint which expects
140+ // a single input string rather than a messages array
134141 const input = this . convertMessagesToInput ( messages )
135142
136143 try {
@@ -152,6 +159,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
152159 }
153160
154161 private convertMessagesToInput ( messages : Anthropic . Messages . MessageParam [ ] ) : string {
162+ // Extract only user messages and convert to plain text
163+ // This is specific to the codex-mini-latest model's requirements
155164 return messages
156165 . map ( ( msg ) => {
157166 if ( msg . role === "user" ) {
@@ -189,9 +198,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
189198 totalText += event . delta
190199 } else if ( event . type === "response.completed" ) {
191200 // Calculate usage based on text length (approximate)
192- // Estimate tokens: ~1 token per 4 characters
193- const promptTokens = Math . ceil ( ( systemPrompt . length + userInput . length ) / 4 )
194- const completionTokens = Math . ceil ( totalText . length / 4 )
201+ // The v1/responses API doesn't provide token usage, so we estimate
202+ const promptTokens = Math . ceil ( ( systemPrompt . length + userInput . length ) / CHARS_PER_TOKEN )
203+ const completionTokens = Math . ceil ( totalText . length / CHARS_PER_TOKEN )
195204 yield * this . yieldUsage ( model . info , {
196205 prompt_tokens : promptTokens ,
197206 completion_tokens : completionTokens ,
@@ -267,9 +276,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
267276 defaultTemperature : OPENAI_NATIVE_DEFAULT_TEMPERATURE ,
268277 } )
269278
270- // The o3 models are named like "o3-mini-[reasoning-effort]", which are
271- // not valid model ids, so we need to strip the suffix.
272- return { id : id . startsWith ( "o3-mini" ) ? "o3-mini" : id , info, ...params }
279+ return { id, info, ...params }
273280 }
274281
275282 async completePrompt ( prompt : string ) : Promise < string > {
0 commit comments