feat: add streaming toggle for OpenAI Native provider

roomote · roomote · commit 27ecbbabf305 · 2025-08-14T18:05:08.000Z
- Add openAiNativeStreamingEnabled field to provider settings schema - Update OpenAI Native provider handler to check streaming option - Add streaming toggle to OpenAI Native UI component - Add translation for streaming description Fixes #6868 - Allow users to disable streaming for GPT-5 and other models that require organization verification for streaming
diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
@@ -200,6 +200,7 @@ const geminiCliSchema = apiModelIdProviderModelSchema.extend({
 const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
 	openAiNativeApiKey: z.string().optional(),
 	openAiNativeBaseUrl: z.string().optional(),
+	openAiNativeStreamingEnabled: z.boolean().optional(),
 })
 
 const mistralSchema = apiModelIdProviderModelSchema.extend({
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -134,22 +134,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		// o1-preview and o1-mini only support user messages
 		const isOriginalO1 = model.id === "o1"
 		const { reasoning } = this.getModel()
+		const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
+
+		if (streamingEnabled) {
+			const response = await this.client.chat.completions.create({
+				model: model.id,
+				messages: [
+					{
+						role: isOriginalO1 ? "developer" : "user",
+						content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt,
+					},
+					...convertToOpenAiMessages(messages),
+				],
+				stream: true,
+				stream_options: { include_usage: true },
+				...(reasoning && reasoning),
+			})
 
-		const response = await this.client.chat.completions.create({
-			model: model.id,
-			messages: [
-				{
-					role: isOriginalO1 ? "developer" : "user",
-					content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt,
-				},
-				...convertToOpenAiMessages(messages),
-			],
-			stream: true,
-			stream_options: { include_usage: true },
-			...(reasoning && reasoning),
-		})
+			yield* this.handleStreamResponse(response, model)
+		} else {
+			// Non-streaming request
+			const response = await this.client.chat.completions.create({
+				model: model.id,
+				messages: [
+					{
+						role: isOriginalO1 ? "developer" : "user",
+						content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt,
+					},
+					...convertToOpenAiMessages(messages),
+				],
+				...(reasoning && reasoning),
+			})
 
-		yield* this.handleStreamResponse(response, model)
+			yield {
+				type: "text",
+				text: response.choices[0]?.message.content || "",
+			}
+
+			if (response.usage) {
+				yield* this.yieldUsage(model.info, response.usage)
+			}
+		}
 	}
 
 	private async *handleReasonerMessage(
@@ -159,22 +184,47 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		messages: Anthropic.Messages.MessageParam[],
 	): ApiStream {
 		const { reasoning } = this.getModel()
+		const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
+
+		if (streamingEnabled) {
+			const stream = await this.client.chat.completions.create({
+				model: family,
+				messages: [
+					{
+						role: "developer",
+						content: `Formatting re-enabled\n${systemPrompt}`,
+					},
+					...convertToOpenAiMessages(messages),
+				],
+				stream: true,
+				stream_options: { include_usage: true },
+				...(reasoning && reasoning),
+			})
 
-		const stream = await this.client.chat.completions.create({
-			model: family,
-			messages: [
-				{
-					role: "developer",
-					content: `Formatting re-enabled\n${systemPrompt}`,
-				},
-				...convertToOpenAiMessages(messages),
-			],
-			stream: true,
-			stream_options: { include_usage: true },
-			...(reasoning && reasoning),
-		})
+			yield* this.handleStreamResponse(stream, model)
+		} else {
+			// Non-streaming request
+			const response = await this.client.chat.completions.create({
+				model: family,
+				messages: [
+					{
+						role: "developer",
+						content: `Formatting re-enabled\n${systemPrompt}`,
+					},
+					...convertToOpenAiMessages(messages),
+				],
+				...(reasoning && reasoning),
+			})
+
+			yield {
+				type: "text",
+				text: response.choices[0]?.message.content || "",
+			}
 
-		yield* this.handleStreamResponse(stream, model)
+			if (response.usage) {
+				yield* this.yieldUsage(model.info, response.usage)
+			}
+		}
 	}
 
 	private async *handleDefaultModelMessage(
@@ -183,34 +233,61 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		messages: Anthropic.Messages.MessageParam[],
 	): ApiStream {
 		const { reasoning, verbosity } = this.getModel()
+		const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
 
-		// Prepare the request parameters
-		const params: any = {
-			model: model.id,
-			temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE,
-			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
-			stream: true,
-			stream_options: { include_usage: true },
-			...(reasoning && reasoning),
-		}
+		if (streamingEnabled) {
+			// Prepare the request parameters for streaming
+			const params: any = {
+				model: model.id,
+				temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE,
+				messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+				stream: true,
+				stream_options: { include_usage: true },
+				...(reasoning && reasoning),
+			}
 
-		// Add verbosity only if the model supports it
-		if (verbosity && model.info.supportsVerbosity) {
-			params.verbosity = verbosity
-		}
+			// Add verbosity only if the model supports it
+			if (verbosity && model.info.supportsVerbosity) {
+				params.verbosity = verbosity
+			}
 
-		const stream = await this.client.chat.completions.create(params)
+			const stream = await this.client.chat.completions.create(params)
 
-		if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
-			throw new Error(
-				"OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage.",
+			if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
+				throw new Error(
+					"OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage.",
+				)
+			}
+
+			yield* this.handleStreamResponse(
+				stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
+				model,
 			)
-		}
+		} else {
+			// Non-streaming request
+			const params: any = {
+				model: model.id,
+				temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE,
+				messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+				...(reasoning && reasoning),
+			}
 
-		yield* this.handleStreamResponse(
-			stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
-			model,
-		)
+			// Add verbosity only if the model supports it
+			if (verbosity && model.info.supportsVerbosity) {
+				params.verbosity = verbosity
+			}
+
+			const response = await this.client.chat.completions.create(params)
+
+			yield {
+				type: "text",
+				text: response.choices[0]?.message.content || "",
+			}
+
+			if (response.usage) {
+				yield* this.yieldUsage(model.info, response.usage)
+			}
+		}
 	}
 
 	private async *handleResponsesApiMessage(
@@ -221,6 +298,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	): ApiStream {
 		// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
 		const { verbosity } = this.getModel()
+		const streamingEnabled = this.options.openAiNativeStreamingEnabled ?? true
 
 		// Both GPT-5 and Codex Mini use the same v1/responses endpoint format
 
@@ -296,8 +374,24 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
 		}
 
+		// Check if streaming is enabled
+		if (!streamingEnabled) {
+			// For non-streaming, we need to modify the request body
+			requestBody.stream = false
+			
+			// Make non-streaming request using the makeGpt5ResponsesAPIRequest method
+			// Note: The method signature expects the requestBody, not params
+			const responseIterator = this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata)
+			
+			// Process the non-streaming response
+			for await (const chunk of responseIterator) {
+				yield chunk
+			}
+			return
+		}
+
 		try {
-			// Use the official SDK
+			// Use the official SDK for streaming
 			const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>
 
 			if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
diff --git a/webview-ui/src/components/settings/providers/OpenAI.tsx b/webview-ui/src/components/settings/providers/OpenAI.tsx
@@ -7,7 +7,7 @@ import type { ProviderSettings } from "@roo-code/types"
 import { useAppTranslation } from "@src/i18n/TranslationContext"
 import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
 
-import { inputEventTransform } from "../transforms"
+import { inputEventTransform, noTransform } from "../transforms"
 
 type OpenAIProps = {
 	apiConfiguration: ProviderSettings
@@ -72,6 +72,14 @@ export const OpenAI = ({ apiConfiguration, setApiConfigurationField }: OpenAIPro
 					{t("settings:providers.getOpenAiApiKey")}
 				</VSCodeButtonLink>
 			)}
+			<Checkbox
+				checked={apiConfiguration?.openAiNativeStreamingEnabled ?? true}
+				onChange={handleInputChange("openAiNativeStreamingEnabled", noTransform)}>
+				{t("settings:modelInfo.enableStreaming")}
+			</Checkbox>
+			<div className="text-sm text-vscode-descriptionForeground ml-6 -mt-2">
+				{t("settings:modelInfo.enableStreamingDescription")}
+			</div>
 		</>
 	)
 }
diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json
@@ -737,6 +737,7 @@
 		"cacheReadsPrice": "Cache reads price",
 		"cacheWritesPrice": "Cache writes price",
 		"enableStreaming": "Enable streaming",
+		"enableStreamingDescription": "Disable streaming if you encounter organization verification errors with advanced models. Non-streaming requests may work without verification.",
 		"enableR1Format": "Enable R1 model parameters",
 		"enableR1FormatTips": "Must be enabled when using R1 models such as QWQ to prevent 400 errors",
 		"useAzure": "Use Azure",