@@ -134,22 +134,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
134134 // o1-preview and o1-mini only support user messages
135135 const isOriginalO1 = model . id === "o1"
136136 const { reasoning } = this . getModel ( )
137+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
138+
139+ if ( streamingEnabled ) {
140+ const response = await this . client . chat . completions . create ( {
141+ model : model . id ,
142+ messages : [
143+ {
144+ role : isOriginalO1 ? "developer" : "user" ,
145+ content : isOriginalO1 ? `Formatting re-enabled\n${ systemPrompt } ` : systemPrompt ,
146+ } ,
147+ ...convertToOpenAiMessages ( messages ) ,
148+ ] ,
149+ stream : true ,
150+ stream_options : { include_usage : true } ,
151+ ...( reasoning && reasoning ) ,
152+ } )
137153
138- const response = await this . client . chat . completions . create ( {
139- model : model . id ,
140- messages : [
141- {
142- role : isOriginalO1 ? "developer" : "user" ,
143- content : isOriginalO1 ? `Formatting re-enabled\n${ systemPrompt } ` : systemPrompt ,
144- } ,
145- ...convertToOpenAiMessages ( messages ) ,
146- ] ,
147- stream : true ,
148- stream_options : { include_usage : true } ,
149- ...( reasoning && reasoning ) ,
150- } )
154+ yield * this . handleStreamResponse ( response , model )
155+ } else {
156+ // Non-streaming request
157+ const response = await this . client . chat . completions . create ( {
158+ model : model . id ,
159+ messages : [
160+ {
161+ role : isOriginalO1 ? "developer" : "user" ,
162+ content : isOriginalO1 ? `Formatting re-enabled\n${ systemPrompt } ` : systemPrompt ,
163+ } ,
164+ ...convertToOpenAiMessages ( messages ) ,
165+ ] ,
166+ ...( reasoning && reasoning ) ,
167+ } )
151168
152- yield * this . handleStreamResponse ( response , model )
169+ yield {
170+ type : "text" ,
171+ text : response . choices [ 0 ] ?. message . content || "" ,
172+ }
173+
174+ if ( response . usage ) {
175+ yield * this . yieldUsage ( model . info , response . usage )
176+ }
177+ }
153178 }
154179
155180 private async * handleReasonerMessage (
@@ -159,22 +184,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
159184 messages : Anthropic . Messages . MessageParam [ ] ,
160185 ) : ApiStream {
161186 const { reasoning } = this . getModel ( )
187+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
188+
189+ if ( streamingEnabled ) {
190+ const stream = await this . client . chat . completions . create ( {
191+ model : family ,
192+ messages : [
193+ {
194+ role : "developer" ,
195+ content : `Formatting re-enabled\n${ systemPrompt } ` ,
196+ } ,
197+ ...convertToOpenAiMessages ( messages ) ,
198+ ] ,
199+ stream : true ,
200+ stream_options : { include_usage : true } ,
201+ ...( reasoning && reasoning ) ,
202+ } )
162203
163- const stream = await this . client . chat . completions . create ( {
164- model : family ,
165- messages : [
166- {
167- role : "developer" ,
168- content : `Formatting re-enabled\n${ systemPrompt } ` ,
169- } ,
170- ...convertToOpenAiMessages ( messages ) ,
171- ] ,
172- stream : true ,
173- stream_options : { include_usage : true } ,
174- ...( reasoning && reasoning ) ,
175- } )
204+ yield * this . handleStreamResponse ( stream , model )
205+ } else {
206+ // Non-streaming request
207+ const response = await this . client . chat . completions . create ( {
208+ model : family ,
209+ messages : [
210+ {
211+ role : "developer" ,
212+ content : `Formatting re-enabled\n${ systemPrompt } ` ,
213+ } ,
214+ ...convertToOpenAiMessages ( messages ) ,
215+ ] ,
216+ ...( reasoning && reasoning ) ,
217+ } )
218+
219+ yield {
220+ type : "text" ,
221+ text : response . choices [ 0 ] ?. message . content || "" ,
222+ }
176223
177- yield * this . handleStreamResponse ( stream , model )
224+ if ( response . usage ) {
225+ yield * this . yieldUsage ( model . info , response . usage )
226+ }
227+ }
178228 }
179229
180230 private async * handleDefaultModelMessage (
@@ -183,34 +233,61 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
183233 messages : Anthropic . Messages . MessageParam [ ] ,
184234 ) : ApiStream {
185235 const { reasoning, verbosity } = this . getModel ( )
236+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
186237
187- // Prepare the request parameters
188- const params : any = {
189- model : model . id ,
190- temperature : this . options . modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE ,
191- messages : [ { role : "system" , content : systemPrompt } , ...convertToOpenAiMessages ( messages ) ] ,
192- stream : true ,
193- stream_options : { include_usage : true } ,
194- ...( reasoning && reasoning ) ,
195- }
238+ if ( streamingEnabled ) {
239+ // Prepare the request parameters for streaming
240+ const params : any = {
241+ model : model . id ,
242+ temperature : this . options . modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE ,
243+ messages : [ { role : "system" , content : systemPrompt } , ...convertToOpenAiMessages ( messages ) ] ,
244+ stream : true ,
245+ stream_options : { include_usage : true } ,
246+ ...( reasoning && reasoning ) ,
247+ }
196248
197- // Add verbosity only if the model supports it
198- if ( verbosity && model . info . supportsVerbosity ) {
199- params . verbosity = verbosity
200- }
249+ // Add verbosity only if the model supports it
250+ if ( verbosity && model . info . supportsVerbosity ) {
251+ params . verbosity = verbosity
252+ }
201253
202- const stream = await this . client . chat . completions . create ( params )
254+ const stream = await this . client . chat . completions . create ( params )
203255
204- if ( typeof ( stream as any ) [ Symbol . asyncIterator ] !== "function" ) {
205- throw new Error (
206- "OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage." ,
256+ if ( typeof ( stream as any ) [ Symbol . asyncIterator ] !== "function" ) {
257+ throw new Error (
258+ "OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage." ,
259+ )
260+ }
261+
262+ yield * this . handleStreamResponse (
263+ stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ,
264+ model ,
207265 )
208- }
266+ } else {
267+ // Non-streaming request
268+ const params : any = {
269+ model : model . id ,
270+ temperature : this . options . modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE ,
271+ messages : [ { role : "system" , content : systemPrompt } , ...convertToOpenAiMessages ( messages ) ] ,
272+ ...( reasoning && reasoning ) ,
273+ }
209274
210- yield * this . handleStreamResponse (
211- stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ,
212- model ,
213- )
275+ // Add verbosity only if the model supports it
276+ if ( verbosity && model . info . supportsVerbosity ) {
277+ params . verbosity = verbosity
278+ }
279+
280+ const response = await this . client . chat . completions . create ( params )
281+
282+ yield {
283+ type : "text" ,
284+ text : response . choices [ 0 ] ?. message . content || "" ,
285+ }
286+
287+ if ( response . usage ) {
288+ yield * this . yieldUsage ( model . info , response . usage )
289+ }
290+ }
214291 }
215292
216293 private async * handleResponsesApiMessage (
@@ -221,6 +298,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
221298 ) : ApiStream {
222299 // Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
223300 const { verbosity } = this . getModel ( )
301+ const streamingEnabled = this . options . openAiNativeStreamingEnabled ?? true
224302
225303 // Both GPT-5 and Codex Mini use the same v1/responses endpoint format
226304
@@ -296,8 +374,24 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
296374 ...( requestPreviousResponseId && { previous_response_id : requestPreviousResponseId } ) ,
297375 }
298376
377+ // Check if streaming is enabled
378+ if ( ! streamingEnabled ) {
379+ // For non-streaming, we need to modify the request body
380+ requestBody . stream = false
381+
382+ // Make non-streaming request using the makeGpt5ResponsesAPIRequest method
383+ // Note: The method signature expects the requestBody, not params
384+ const responseIterator = this . makeGpt5ResponsesAPIRequest ( requestBody , model , metadata )
385+
386+ // Process the non-streaming response
387+ for await ( const chunk of responseIterator ) {
388+ yield chunk
389+ }
390+ return
391+ }
392+
299393 try {
300- // Use the official SDK
394+ // Use the official SDK for streaming
301395 const stream = ( await ( this . client as any ) . responses . create ( requestBody ) ) as AsyncIterable < any >
302396
303397 if ( typeof ( stream as any ) [ Symbol . asyncIterator ] !== "function" ) {
0 commit comments