Skip to content

Commit abb456f

Browse files
committed
feat: add streaming toggle for OpenAI Native provider
- Add openAiNativeStreamingEnabled field to provider settings schema - Update OpenAI Native provider handler to check streaming option - Add streaming toggle to OpenAI Native UI component - Add translation for streaming description Fixes #6868 - Allow users to disable streaming for GPT-5 and other models that require organization verification for streaming
1 parent ad0e33e commit abb456f

File tree

4 files changed

+217
-76
lines changed

4 files changed

+217
-76
lines changed

packages/types/src/provider-settings.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ const geminiCliSchema = apiModelIdProviderModelSchema.extend({
189189
const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
190190
openAiNativeApiKey: z.string().optional(),
191191
openAiNativeBaseUrl: z.string().optional(),
192+
openAiNativeStreamingEnabled: z.boolean().optional(),
192193
})
193194

194195
const mistralSchema = apiModelIdProviderModelSchema.extend({

src/api/providers/openai-native.ts

Lines changed: 206 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -97,22 +97,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9797
// o1-preview and o1-mini only support user messages
9898
const isOriginalO1 = model.id === "o1"
9999
const { reasoning } = this.getModel()
100+
const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
101+
102+
if (streamingEnabled) {
103+
const response = await this.client.chat.completions.create({
104+
model: model.id,
105+
messages: [
106+
{
107+
role: isOriginalO1 ? "developer" : "user",
108+
content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt,
109+
},
110+
...convertToOpenAiMessages(messages),
111+
],
112+
stream: true,
113+
stream_options: { include_usage: true },
114+
...(reasoning && reasoning),
115+
})
100116

101-
const response = await this.client.chat.completions.create({
102-
model: model.id,
103-
messages: [
104-
{
105-
role: isOriginalO1 ? "developer" : "user",
106-
content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt,
107-
},
108-
...convertToOpenAiMessages(messages),
109-
],
110-
stream: true,
111-
stream_options: { include_usage: true },
112-
...(reasoning && reasoning),
113-
})
117+
yield* this.handleStreamResponse(response, model)
118+
} else {
119+
// Non-streaming request
120+
const response = await this.client.chat.completions.create({
121+
model: model.id,
122+
messages: [
123+
{
124+
role: isOriginalO1 ? "developer" : "user",
125+
content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt,
126+
},
127+
...convertToOpenAiMessages(messages),
128+
],
129+
...(reasoning && reasoning),
130+
})
114131

115-
yield* this.handleStreamResponse(response, model)
132+
yield {
133+
type: "text",
134+
text: response.choices[0]?.message.content || "",
135+
}
136+
137+
if (response.usage) {
138+
yield* this.yieldUsage(model.info, response.usage)
139+
}
140+
}
116141
}
117142

118143
private async *handleReasonerMessage(
@@ -122,22 +147,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
122147
messages: Anthropic.Messages.MessageParam[],
123148
): ApiStream {
124149
const { reasoning } = this.getModel()
150+
const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
151+
152+
if (streamingEnabled) {
153+
const stream = await this.client.chat.completions.create({
154+
model: family,
155+
messages: [
156+
{
157+
role: "developer",
158+
content: `Formatting re-enabled\n${systemPrompt}`,
159+
},
160+
...convertToOpenAiMessages(messages),
161+
],
162+
stream: true,
163+
stream_options: { include_usage: true },
164+
...(reasoning && reasoning),
165+
})
125166

126-
const stream = await this.client.chat.completions.create({
127-
model: family,
128-
messages: [
129-
{
130-
role: "developer",
131-
content: `Formatting re-enabled\n${systemPrompt}`,
132-
},
133-
...convertToOpenAiMessages(messages),
134-
],
135-
stream: true,
136-
stream_options: { include_usage: true },
137-
...(reasoning && reasoning),
138-
})
167+
yield* this.handleStreamResponse(stream, model)
168+
} else {
169+
// Non-streaming request
170+
const response = await this.client.chat.completions.create({
171+
model: family,
172+
messages: [
173+
{
174+
role: "developer",
175+
content: `Formatting re-enabled\n${systemPrompt}`,
176+
},
177+
...convertToOpenAiMessages(messages),
178+
],
179+
...(reasoning && reasoning),
180+
})
139181

140-
yield* this.handleStreamResponse(stream, model)
182+
yield {
183+
type: "text",
184+
text: response.choices[0]?.message.content || "",
185+
}
186+
187+
if (response.usage) {
188+
yield* this.yieldUsage(model.info, response.usage)
189+
}
190+
}
141191
}
142192

143193
private async *handleDefaultModelMessage(
@@ -146,41 +196,70 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
146196
messages: Anthropic.Messages.MessageParam[],
147197
): ApiStream {
148198
const { reasoning, verbosity } = this.getModel()
199+
const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
200+
201+
if (streamingEnabled) {
202+
// Prepare the request parameters for streaming
203+
const params: any = {
204+
model: model.id,
205+
temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE,
206+
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
207+
stream: true,
208+
stream_options: { include_usage: true },
209+
...(reasoning && reasoning),
210+
}
149211

150-
// Prepare the request parameters
151-
const params: any = {
152-
model: model.id,
153-
temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE,
154-
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
155-
stream: true,
156-
stream_options: { include_usage: true },
157-
...(reasoning && reasoning),
158-
}
212+
// Add verbosity if supported (for future GPT-5 models)
213+
if (verbosity && model.id.startsWith("gpt-5")) {
214+
params.verbosity = verbosity
215+
}
159216

160-
// Add verbosity if supported (for future GPT-5 models)
161-
if (verbosity && model.id.startsWith("gpt-5")) {
162-
params.verbosity = verbosity
163-
}
217+
const stream = await this.client.chat.completions.create(params)
164218

165-
const stream = await this.client.chat.completions.create(params)
219+
if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
220+
throw new Error(
221+
"OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage.",
222+
)
223+
}
166224

167-
if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
168-
throw new Error(
169-
"OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage.",
225+
yield* this.handleStreamResponse(
226+
stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
227+
model,
170228
)
171-
}
229+
} else {
230+
// Non-streaming request
231+
const params: any = {
232+
model: model.id,
233+
temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE,
234+
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
235+
...(reasoning && reasoning),
236+
}
237+
238+
// Add verbosity if supported (for future GPT-5 models)
239+
if (verbosity && model.id.startsWith("gpt-5")) {
240+
params.verbosity = verbosity
241+
}
172242

173-
yield* this.handleStreamResponse(
174-
stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
175-
model,
176-
)
243+
const response = await this.client.chat.completions.create(params)
244+
245+
yield {
246+
type: "text",
247+
text: response.choices[0]?.message.content || "",
248+
}
249+
250+
if (response.usage) {
251+
yield* this.yieldUsage(model.info, response.usage)
252+
}
253+
}
177254
}
178255

179256
private async *handleGpt5Message(
180257
model: OpenAiNativeModel,
181258
systemPrompt: string,
182259
messages: Anthropic.Messages.MessageParam[],
183260
): ApiStream {
261+
const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
262+
184263
// GPT-5 uses the Responses API, not Chat Completions
185264
// We need to format the input as a single string combining system prompt and messages
186265
const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages)
@@ -207,7 +286,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
207286

208287
// Since the OpenAI SDK doesn't yet support the Responses API,
209288
// we'll make a direct HTTP request
210-
const response = await this.makeGpt5ResponsesAPIRequest(params, model)
289+
const response = await this.makeGpt5ResponsesAPIRequest(params, model, streamingEnabled)
211290

212291
yield* this.handleGpt5StreamResponse(response, model)
213292
}
@@ -248,6 +327,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
248327
private async makeGpt5ResponsesAPIRequest(
249328
params: GPT5ResponsesAPIParams,
250329
model: OpenAiNativeModel,
330+
streamingEnabled: boolean = true,
251331
): Promise<AsyncIterable<GPT5ResponseChunk>> {
252332
// The OpenAI SDK doesn't have direct support for the Responses API yet,
253333
// but we can access it through the underlying client request method if available.
@@ -258,36 +338,87 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
258338
// GPT-5 models use "developer" role for system messages
259339
const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "developer", content: params.input }]
260340

261-
// Build the request parameters
262-
const requestParams: any = {
263-
model: params.model,
264-
messages,
265-
stream: true,
266-
stream_options: { include_usage: true },
267-
}
341+
if (streamingEnabled) {
342+
// Build the request parameters for streaming
343+
const requestParams: any = {
344+
model: params.model,
345+
messages,
346+
stream: true,
347+
stream_options: { include_usage: true },
348+
}
268349

269-
// Add reasoning effort if specified (supporting "minimal" for GPT-5)
270-
if (params.reasoning?.effort) {
271-
if (params.reasoning.effort === "minimal") {
272-
// For minimal effort, we pass "minimal" as the reasoning_effort
273-
requestParams.reasoning_effort = "minimal"
274-
} else {
275-
requestParams.reasoning_effort = params.reasoning.effort
350+
// Add reasoning effort if specified (supporting "minimal" for GPT-5)
351+
if (params.reasoning?.effort) {
352+
if (params.reasoning.effort === "minimal") {
353+
// For minimal effort, we pass "minimal" as the reasoning_effort
354+
requestParams.reasoning_effort = "minimal"
355+
} else {
356+
requestParams.reasoning_effort = params.reasoning.effort
357+
}
358+
}
359+
360+
// Add verbosity control for GPT-5 models
361+
// According to the docs, Chat Completions API also supports verbosity parameter
362+
if (params.text?.verbosity) {
363+
requestParams.verbosity = params.text.verbosity
276364
}
277-
}
278365

279-
// Add verbosity control for GPT-5 models
280-
// According to the docs, Chat Completions API also supports verbosity parameter
281-
if (params.text?.verbosity) {
282-
requestParams.verbosity = params.text.verbosity
366+
const stream = (await this.client.chat.completions.create(
367+
requestParams,
368+
)) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>
369+
370+
// Convert the stream to GPT-5 response format
371+
return this.convertChatStreamToGpt5Format(stream)
372+
} else {
373+
// Non-streaming request
374+
const requestParams: any = {
375+
model: params.model,
376+
messages,
377+
}
378+
379+
// Add reasoning effort if specified (supporting "minimal" for GPT-5)
380+
if (params.reasoning?.effort) {
381+
if (params.reasoning.effort === "minimal") {
382+
requestParams.reasoning_effort = "minimal"
383+
} else {
384+
requestParams.reasoning_effort = params.reasoning.effort
385+
}
386+
}
387+
388+
// Add verbosity control for GPT-5 models
389+
if (params.text?.verbosity) {
390+
requestParams.verbosity = params.text.verbosity
391+
}
392+
393+
const response = await this.client.chat.completions.create(requestParams)
394+
395+
// Convert non-streaming response to GPT-5 format
396+
return this.convertChatResponseToGpt5Format(response)
283397
}
398+
}
284399

285-
const stream = (await this.client.chat.completions.create(
286-
requestParams,
287-
)) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>
400+
private async *convertChatResponseToGpt5Format(
401+
response: OpenAI.Chat.Completions.ChatCompletion,
402+
): AsyncIterable<GPT5ResponseChunk> {
403+
// Yield text content
404+
if (response.choices[0]?.message.content) {
405+
yield {
406+
type: "text",
407+
text: response.choices[0].message.content,
408+
}
409+
}
288410

289-
// Convert the stream to GPT-5 response format
290-
return this.convertChatStreamToGpt5Format(stream)
411+
// Yield usage information
412+
if (response.usage) {
413+
yield {
414+
type: "usage",
415+
usage: {
416+
input_tokens: response.usage.prompt_tokens || 0,
417+
output_tokens: response.usage.completion_tokens || 0,
418+
total_tokens: response.usage.total_tokens || 0,
419+
},
420+
}
421+
}
291422
}
292423

293424
private async *convertChatStreamToGpt5Format(

webview-ui/src/components/settings/providers/OpenAI.tsx

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import type { ProviderSettings } from "@roo-code/types"
77
import { useAppTranslation } from "@src/i18n/TranslationContext"
88
import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
99

10-
import { inputEventTransform } from "../transforms"
10+
import { inputEventTransform, noTransform } from "../transforms"
1111

1212
type OpenAIProps = {
1313
apiConfiguration: ProviderSettings
@@ -72,6 +72,14 @@ export const OpenAI = ({ apiConfiguration, setApiConfigurationField }: OpenAIPro
7272
{t("settings:providers.getOpenAiApiKey")}
7373
</VSCodeButtonLink>
7474
)}
75+
<Checkbox
76+
checked={apiConfiguration?.openAiNativeStreamingEnabled ?? true}
77+
onChange={handleInputChange("openAiNativeStreamingEnabled", noTransform)}>
78+
{t("settings:modelInfo.enableStreaming")}
79+
</Checkbox>
80+
<div className="text-sm text-vscode-descriptionForeground ml-6 -mt-2">
81+
{t("settings:modelInfo.enableStreamingDescription")}
82+
</div>
7583
</>
7684
)
7785
}

webview-ui/src/i18n/locales/en/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@
726726
"cacheReadsPrice": "Cache reads price",
727727
"cacheWritesPrice": "Cache writes price",
728728
"enableStreaming": "Enable streaming",
729+
"enableStreamingDescription": "Disable streaming if you encounter organization verification errors with advanced models. Non-streaming requests may work without verification.",
729730
"enableR1Format": "Enable R1 model parameters",
730731
"enableR1FormatTips": "Must be enabled when using R1 models such as QWQ to prevent 400 errors",
731732
"useAzure": "Use Azure",

0 commit comments

Comments
 (0)