@@ -97,22 +97,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9797 // o1-preview and o1-mini only support user messages
9898 const isOriginalO1 = model . id === "o1"
9999 const { reasoning } = this . getModel ( )
100+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
101+
102+ if ( streamingEnabled ) {
103+ const response = await this . client . chat . completions . create ( {
104+ model : model . id ,
105+ messages : [
106+ {
107+ role : isOriginalO1 ? "developer" : "user" ,
108+ content : isOriginalO1 ? `Formatting re-enabled\n${ systemPrompt } ` : systemPrompt ,
109+ } ,
110+ ...convertToOpenAiMessages ( messages ) ,
111+ ] ,
112+ stream : true ,
113+ stream_options : { include_usage : true } ,
114+ ...( reasoning && reasoning ) ,
115+ } )
100116
101- const response = await this . client . chat . completions . create ( {
102- model : model . id ,
103- messages : [
104- {
105- role : isOriginalO1 ? "developer" : "user" ,
106- content : isOriginalO1 ? `Formatting re-enabled\n${ systemPrompt } ` : systemPrompt ,
107- } ,
108- ...convertToOpenAiMessages ( messages ) ,
109- ] ,
110- stream : true ,
111- stream_options : { include_usage : true } ,
112- ...( reasoning && reasoning ) ,
113- } )
117+ yield * this . handleStreamResponse ( response , model )
118+ } else {
119+ // Non-streaming request
120+ const response = await this . client . chat . completions . create ( {
121+ model : model . id ,
122+ messages : [
123+ {
124+ role : isOriginalO1 ? "developer" : "user" ,
125+ content : isOriginalO1 ? `Formatting re-enabled\n${ systemPrompt } ` : systemPrompt ,
126+ } ,
127+ ...convertToOpenAiMessages ( messages ) ,
128+ ] ,
129+ ...( reasoning && reasoning ) ,
130+ } )
114131
115- yield * this . handleStreamResponse ( response , model )
132+ yield {
133+ type : "text" ,
134+ text : response . choices [ 0 ] ?. message . content || "" ,
135+ }
136+
137+ if ( response . usage ) {
138+ yield * this . yieldUsage ( model . info , response . usage )
139+ }
140+ }
116141 }
117142
118143 private async * handleReasonerMessage (
@@ -122,22 +147,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
122147 messages : Anthropic . Messages . MessageParam [ ] ,
123148 ) : ApiStream {
124149 const { reasoning } = this . getModel ( )
150+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
151+
152+ if ( streamingEnabled ) {
153+ const stream = await this . client . chat . completions . create ( {
154+ model : family ,
155+ messages : [
156+ {
157+ role : "developer" ,
158+ content : `Formatting re-enabled\n${ systemPrompt } ` ,
159+ } ,
160+ ...convertToOpenAiMessages ( messages ) ,
161+ ] ,
162+ stream : true ,
163+ stream_options : { include_usage : true } ,
164+ ...( reasoning && reasoning ) ,
165+ } )
125166
126- const stream = await this . client . chat . completions . create ( {
127- model : family ,
128- messages : [
129- {
130- role : "developer" ,
131- content : `Formatting re-enabled\n${ systemPrompt } ` ,
132- } ,
133- ...convertToOpenAiMessages ( messages ) ,
134- ] ,
135- stream : true ,
136- stream_options : { include_usage : true } ,
137- ...( reasoning && reasoning ) ,
138- } )
167+ yield * this . handleStreamResponse ( stream , model )
168+ } else {
169+ // Non-streaming request
170+ const response = await this . client . chat . completions . create ( {
171+ model : family ,
172+ messages : [
173+ {
174+ role : "developer" ,
175+ content : `Formatting re-enabled\n${ systemPrompt } ` ,
176+ } ,
177+ ...convertToOpenAiMessages ( messages ) ,
178+ ] ,
179+ ...( reasoning && reasoning ) ,
180+ } )
139181
140- yield * this . handleStreamResponse ( stream , model )
182+ yield {
183+ type : "text" ,
184+ text : response . choices [ 0 ] ?. message . content || "" ,
185+ }
186+
187+ if ( response . usage ) {
188+ yield * this . yieldUsage ( model . info , response . usage )
189+ }
190+ }
141191 }
142192
143193 private async * handleDefaultModelMessage (
@@ -146,41 +196,70 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
146196 messages : Anthropic . Messages . MessageParam [ ] ,
147197 ) : ApiStream {
148198 const { reasoning, verbosity } = this . getModel ( )
199+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
200+
201+ if ( streamingEnabled ) {
202+ // Prepare the request parameters for streaming
203+ const params : any = {
204+ model : model . id ,
205+ temperature : this . options . modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE ,
206+ messages : [ { role : "system" , content : systemPrompt } , ...convertToOpenAiMessages ( messages ) ] ,
207+ stream : true ,
208+ stream_options : { include_usage : true } ,
209+ ...( reasoning && reasoning ) ,
210+ }
149211
150- // Prepare the request parameters
151- const params : any = {
152- model : model . id ,
153- temperature : this . options . modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE ,
154- messages : [ { role : "system" , content : systemPrompt } , ...convertToOpenAiMessages ( messages ) ] ,
155- stream : true ,
156- stream_options : { include_usage : true } ,
157- ...( reasoning && reasoning ) ,
158- }
212+ // Add verbosity if supported (for future GPT-5 models)
213+ if ( verbosity && model . id . startsWith ( "gpt-5" ) ) {
214+ params . verbosity = verbosity
215+ }
159216
160- // Add verbosity if supported (for future GPT-5 models)
161- if ( verbosity && model . id . startsWith ( "gpt-5" ) ) {
162- params . verbosity = verbosity
163- }
217+ const stream = await this . client . chat . completions . create ( params )
164218
165- const stream = await this . client . chat . completions . create ( params )
219+ if ( typeof ( stream as any ) [ Symbol . asyncIterator ] !== "function" ) {
220+ throw new Error (
221+ "OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage." ,
222+ )
223+ }
166224
167- if ( typeof ( stream as any ) [ Symbol . asyncIterator ] !== "function" ) {
168- throw new Error (
169- "OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage." ,
225+ yield * this . handleStreamResponse (
226+ stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ,
227+ model ,
170228 )
171- }
229+ } else {
230+ // Non-streaming request
231+ const params : any = {
232+ model : model . id ,
233+ temperature : this . options . modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE ,
234+ messages : [ { role : "system" , content : systemPrompt } , ...convertToOpenAiMessages ( messages ) ] ,
235+ ...( reasoning && reasoning ) ,
236+ }
237+
238+ // Add verbosity if supported (for future GPT-5 models)
239+ if ( verbosity && model . id . startsWith ( "gpt-5" ) ) {
240+ params . verbosity = verbosity
241+ }
172242
173- yield * this . handleStreamResponse (
174- stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ,
175- model ,
176- )
243+ const response = await this . client . chat . completions . create ( params )
244+
245+ yield {
246+ type : "text" ,
247+ text : response . choices [ 0 ] ?. message . content || "" ,
248+ }
249+
250+ if ( response . usage ) {
251+ yield * this . yieldUsage ( model . info , response . usage )
252+ }
253+ }
177254 }
178255
179256 private async * handleGpt5Message (
180257 model : OpenAiNativeModel ,
181258 systemPrompt : string ,
182259 messages : Anthropic . Messages . MessageParam [ ] ,
183260 ) : ApiStream {
261+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
262+
184263 // GPT-5 uses the Responses API, not Chat Completions
185264 // We need to format the input as a single string combining system prompt and messages
186265 const formattedInput = this . formatInputForResponsesAPI ( systemPrompt , messages )
@@ -207,7 +286,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
207286
208287 // Since the OpenAI SDK doesn't yet support the Responses API,
209288 // we'll make a direct HTTP request
210- const response = await this . makeGpt5ResponsesAPIRequest ( params , model )
289+ const response = await this . makeGpt5ResponsesAPIRequest ( params , model , streamingEnabled )
211290
212291 yield * this . handleGpt5StreamResponse ( response , model )
213292 }
@@ -248,6 +327,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
248327 private async makeGpt5ResponsesAPIRequest (
249328 params : GPT5ResponsesAPIParams ,
250329 model : OpenAiNativeModel ,
330+ streamingEnabled : boolean = true ,
251331 ) : Promise < AsyncIterable < GPT5ResponseChunk > > {
252332 // The OpenAI SDK doesn't have direct support for the Responses API yet,
253333 // but we can access it through the underlying client request method if available.
@@ -258,36 +338,87 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
258338 // GPT-5 models use "developer" role for system messages
259339 const messages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [ { role : "developer" , content : params . input } ]
260340
261- // Build the request parameters
262- const requestParams : any = {
263- model : params . model ,
264- messages,
265- stream : true ,
266- stream_options : { include_usage : true } ,
267- }
341+ if ( streamingEnabled ) {
342+ // Build the request parameters for streaming
343+ const requestParams : any = {
344+ model : params . model ,
345+ messages,
346+ stream : true ,
347+ stream_options : { include_usage : true } ,
348+ }
268349
269- // Add reasoning effort if specified (supporting "minimal" for GPT-5)
270- if ( params . reasoning ?. effort ) {
271- if ( params . reasoning . effort === "minimal" ) {
272- // For minimal effort, we pass "minimal" as the reasoning_effort
273- requestParams . reasoning_effort = "minimal"
274- } else {
275- requestParams . reasoning_effort = params . reasoning . effort
350+ // Add reasoning effort if specified (supporting "minimal" for GPT-5)
351+ if ( params . reasoning ?. effort ) {
352+ if ( params . reasoning . effort === "minimal" ) {
353+ // For minimal effort, we pass "minimal" as the reasoning_effort
354+ requestParams . reasoning_effort = "minimal"
355+ } else {
356+ requestParams . reasoning_effort = params . reasoning . effort
357+ }
358+ }
359+
360+ // Add verbosity control for GPT-5 models
361+ // According to the docs, Chat Completions API also supports verbosity parameter
362+ if ( params . text ?. verbosity ) {
363+ requestParams . verbosity = params . text . verbosity
276364 }
277- }
278365
279- // Add verbosity control for GPT-5 models
280- // According to the docs, Chat Completions API also supports verbosity parameter
281- if ( params . text ?. verbosity ) {
282- requestParams . verbosity = params . text . verbosity
366+ const stream = ( await this . client . chat . completions . create (
367+ requestParams ,
368+ ) ) as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk >
369+
370+ // Convert the stream to GPT-5 response format
371+ return this . convertChatStreamToGpt5Format ( stream )
372+ } else {
373+ // Non-streaming request
374+ const requestParams : any = {
375+ model : params . model ,
376+ messages,
377+ }
378+
379+ // Add reasoning effort if specified (supporting "minimal" for GPT-5)
380+ if ( params . reasoning ?. effort ) {
381+ if ( params . reasoning . effort === "minimal" ) {
382+ requestParams . reasoning_effort = "minimal"
383+ } else {
384+ requestParams . reasoning_effort = params . reasoning . effort
385+ }
386+ }
387+
388+ // Add verbosity control for GPT-5 models
389+ if ( params . text ?. verbosity ) {
390+ requestParams . verbosity = params . text . verbosity
391+ }
392+
393+ const response = await this . client . chat . completions . create ( requestParams )
394+
395+ // Convert non-streaming response to GPT-5 format
396+ return this . convertChatResponseToGpt5Format ( response )
283397 }
398+ }
284399
285- const stream = ( await this . client . chat . completions . create (
286- requestParams ,
287- ) ) as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk >
400+ private async * convertChatResponseToGpt5Format (
401+ response : OpenAI . Chat . Completions . ChatCompletion ,
402+ ) : AsyncIterable < GPT5ResponseChunk > {
403+ // Yield text content
404+ if ( response . choices [ 0 ] ?. message . content ) {
405+ yield {
406+ type : "text" ,
407+ text : response . choices [ 0 ] . message . content ,
408+ }
409+ }
288410
289- // Convert the stream to GPT-5 response format
290- return this . convertChatStreamToGpt5Format ( stream )
411+ // Yield usage information
412+ if ( response . usage ) {
413+ yield {
414+ type : "usage" ,
415+ usage : {
416+ input_tokens : response . usage . prompt_tokens || 0 ,
417+ output_tokens : response . usage . completion_tokens || 0 ,
418+ total_tokens : response . usage . total_tokens || 0 ,
419+ } ,
420+ }
421+ }
291422 }
292423
293424 private async * convertChatStreamToGpt5Format (
0 commit comments