diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index fd327657b6..246d576b44 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -296,6 +296,10 @@ const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
 	// OpenAI Responses API service tier for openai-native provider only.
 	// UI should only expose this when the selected model supports flex/priority.
 	openAiNativeServiceTier: serviceTierSchema.optional(),
+	// Disable response streaming
+	openAiNativeDisableStreaming: z.boolean().optional(),
+	// Disable reasoning summaries
+	openAiNativeDisableReasoningSummaries: z.boolean().optional(),
 })
 
 const mistralSchema = apiModelIdProviderModelSchema.extend({
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
index 618cdeac65..41f4dbf55c 100644
--- a/src/api/providers/__tests__/openai-native.spec.ts
+++ b/src/api/providers/__tests__/openai-native.spec.ts
@@ -125,6 +125,143 @@ describe("OpenAiNativeHandler", () => {
 				}
 			}).rejects.toThrow("OpenAI service error")
 		})
+
+		it("should handle non-streaming responses via SDK when stream=false", async () => {
+			// Reconfigure handler to force non-stream (buildRequestBody sets stream = !openAiNativeDisableStreaming)
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				openAiNativeDisableStreaming: true, // => stream: false
+			})
+
+			// Mock SDK non-streaming JSON response
+			mockResponsesCreate.mockResolvedValueOnce({
+				id: "resp_nonstream_1",
+				output: [
+					{
+						type: "message",
+						content: [{ type: "output_text", text: "Non-streamed reply" }],
+					},
+				],
+				usage: {
+					input_tokens: 12,
+					output_tokens: 7,
+					cache_read_input_tokens: 0,
+					cache_creation_input_tokens: 0,
+				},
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Verify yielded content and usage from non-streaming path
+			expect(chunks.length).toBeGreaterThan(0)
+			expect(chunks[0]).toEqual({ type: "text", text: "Non-streamed reply" })
+			const usage = chunks.find((c) => c.type === "usage")
+			expect(usage).toBeTruthy()
+			expect(usage.inputTokens).toBe(12)
+			expect(usage.outputTokens).toBe(7)
+
+			// Ensure SDK was called with stream=false and structured input
+			expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+			const body = mockResponsesCreate.mock.calls[0][0]
+			expect(body.stream).toBe(false)
+			expect(body.instructions).toBe(systemPrompt)
+			expect(body.input).toEqual([{ role: "user", content: [{ type: "input_text", text: "Hello!" }] }])
+		})
+
+		it("should retry non-streaming when previous_response_id is invalid (400) and then succeed", async () => {
+			// Reconfigure handler to force non-stream (stream=false)
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				openAiNativeDisableStreaming: true,
+			})
+
+			// First SDK call fails with 400 indicating previous_response_id not found
+			const err: any = new Error("Previous response not found")
+			err.status = 400
+			err.response = { status: 400 }
+			mockResponsesCreate.mockRejectedValueOnce(err).mockResolvedValueOnce({
+				id: "resp_after_retry",
+				output: [
+					{
+						type: "message",
+						content: [{ type: "output_text", text: "Reply after retry" }],
+					},
+				],
+				usage: {
+					input_tokens: 9,
+					output_tokens: 3,
+					cache_read_input_tokens: 0,
+					cache_creation_input_tokens: 0,
+				},
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages, {
+				taskId: "t-1",
+				previousResponseId: "resp_invalid",
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Two SDK calls (retry path)
+			expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+
+			// First call: includes previous_response_id and only latest user message
+			const firstBody = mockResponsesCreate.mock.calls[0][0]
+			expect(firstBody.stream).toBe(false)
+			expect(firstBody.previous_response_id).toBe("resp_invalid")
+			expect(firstBody.input).toEqual([{ role: "user", content: [{ type: "input_text", text: "Hello!" }] }])
+
+			// Second call (retry): no previous_response_id, includes full conversation (still single latest message in this test)
+			const secondBody = mockResponsesCreate.mock.calls[1][0]
+			expect(secondBody.stream).toBe(false)
+			expect(secondBody.previous_response_id).toBeUndefined()
+			expect(secondBody.instructions).toBe(systemPrompt)
+			// With only one message in this suite, the "full conversation" equals the single user message
+			expect(secondBody.input).toEqual([{ role: "user", content: [{ type: "input_text", text: "Hello!" }] }])
+
+			// Verify yielded chunks from retry
+			expect(chunks[0]).toEqual({ type: "text", text: "Reply after retry" })
+			const usage = chunks.find((c) => c.type === "usage")
+			expect(usage.inputTokens).toBe(9)
+			expect(usage.outputTokens).toBe(3)
+		})
+
+		it("should NOT fallback to SSE when streaming is disabled and non-stream SDK error occurs", async () => {
+			// Force non-stream path via disable streaming toggle
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				openAiNativeDisableStreaming: true, // => stream: false
+			})
+
+			// Make SDK throw a non-previous_response error (e.g., 500)
+			const err: any = new Error("Some server error")
+			err.status = 500
+			err.response = { status: 500 }
+			mockResponsesCreate.mockRejectedValueOnce(err)
+
+			// Prepare a fetch mock to detect any unintended SSE fallback usage
+			const mockFetch = vitest.fn()
+			;(global as any).fetch = mockFetch as any
+
+			const stream = handler.createMessage(systemPrompt, messages)
+
+			// Expect iteration to reject and no SSE fallback to be attempted
+			await expect(async () => {
+				for await (const _ of stream) {
+					// consume
+				}
+			}).rejects.toThrow("Some server error")
+
+			// Ensure SSE fallback was NOT invoked
+			expect(mockFetch).not.toHaveBeenCalled()
+		})
 	})
 
 	describe("completePrompt", () => {
@@ -1734,3 +1871,136 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 		})
 	})
 })
+
+describe("Unverified org gating behavior", () => {
+	beforeEach(() => {
+		// Ensure call counts don't accumulate from previous test suites
+		mockResponsesCreate.mockClear()
+		// Ensure no SSE fallback interference
+		if ((global as any).fetch) {
+			delete (global as any).fetch
+		}
+	})
+
+	afterEach(() => {
+		// Clean up any accidental fetch mocks
+		if ((global as any).fetch) {
+			delete (global as any).fetch
+		}
+	})
+
+	it("omits reasoning.summary in createMessage request when unverified org is true (GPT-5)", async () => {
+		// Arrange
+		const handler = new OpenAiNativeHandler({
+			apiModelId: "gpt-5-2025-08-07",
+			openAiNativeApiKey: "test-api-key",
+			openAiNativeDisableStreaming: true, // => stream=false
+			openAiNativeDisableReasoningSummaries: true, // => summary must be omitted
+		})
+
+		// SDK returns a minimal valid non-stream response
+		mockResponsesCreate.mockResolvedValueOnce({
+			id: "resp_nonstream_2",
+			output: [],
+			usage: { input_tokens: 1, output_tokens: 1 },
+		})
+
+		// Act
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
+		const stream = handler.createMessage(systemPrompt, messages)
+		for await (const _ of stream) {
+			// drain
+		}
+
+		// Assert
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		const body = mockResponsesCreate.mock.calls[0][0]
+		expect(body.model).toBe("gpt-5-2025-08-07")
+		expect(body.stream).toBe(false)
+		// GPT-5 includes reasoning effort; summary must be omitted for unverified orgs
+		expect(body.reasoning?.effort).toBeDefined()
+		expect(body.reasoning?.summary).toBeUndefined()
+	})
+
+	it("omits reasoning.summary in completePrompt request when unverified org is true (GPT-5)", async () => {
+		// Arrange
+		const handler = new OpenAiNativeHandler({
+			apiModelId: "gpt-5-2025-08-07",
+			openAiNativeApiKey: "test-api-key",
+			openAiNativeDisableReasoningSummaries: true, // => summary must be omitted in completePrompt too
+		})
+
+		// SDK returns a non-stream completion
+		mockResponsesCreate.mockResolvedValueOnce({
+			output: [
+				{
+					type: "message",
+					content: [{ type: "output_text", text: "Completion" }],
+				},
+			],
+		})
+
+		// Act
+		const result = await handler.completePrompt("Prompt text")
+
+		// Assert
+		expect(result).toBe("Completion")
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		const body = mockResponsesCreate.mock.calls[0][0]
+		expect(body.model).toBe("gpt-5-2025-08-07")
+		expect(body.stream).toBe(false)
+		expect(body.store).toBe(false)
+		// Reasoning present, but summary must be omitted
+		expect(body.reasoning?.effort).toBeDefined()
+		expect(body.reasoning?.summary).toBeUndefined()
+	})
+
+	it("include reasoning.summary in createMessage request when unverified org is false", async () => {
+		// Arrange
+		const handler = new OpenAiNativeHandler({
+			apiModelId: "gpt-5-2025-08-07",
+			openAiNativeApiKey: "test-api-key",
+			openAiNativeDisableStreaming: false, // => stream=true
+			openAiNativeDisableReasoningSummaries: false, // => summary should be included
+		})
+
+		// Mock SDK to return a proper async iterable for streaming
+		const createMockStream = (chunks: any[]) => {
+			return {
+				async *[Symbol.asyncIterator]() {
+					for (const chunk of chunks) {
+						yield chunk
+					}
+				},
+			}
+		}
+
+		mockResponsesCreate.mockResolvedValueOnce(
+			createMockStream([
+				{ type: "response.text.delta", delta: "Test" },
+				{
+					type: "response.done",
+					response: { id: "resp_stream_1", usage: { prompt_tokens: 10, completion_tokens: 1 } },
+				},
+			]),
+		)
+
+		// Act
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
+		const stream = handler.createMessage(systemPrompt, messages)
+		for await (const _ of stream) {
+			// drain
+		}
+
+		// Assert
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		const body = mockResponsesCreate.mock.calls[0][0]
+		expect(body.model).toBe("gpt-5-2025-08-07")
+		expect(body.stream).toBe(true)
+		// GPT-5 includes reasoning effort and summary should be "auto" when unverified org is false
+		expect(body.reasoning?.effort).toBeDefined()
+		expect(body.reasoning?.summary).toBe("auto")
+	})
+})
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index 8a205a06b4..b69585189d 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -221,6 +221,14 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
 	}
 
+	private canOrganizationUseStreaming(): boolean {
+		return !this.options.openAiNativeDisableStreaming
+	}
+
+	private canOrganizationUseGpt5ReasoningSummary(): boolean {
+		return !!(this.options.enableGpt5ReasoningSummary && !this.options.openAiNativeDisableReasoningSummaries)
+	}
+
 	private buildRequestBody(
 		model: OpenAiNativeModel,
 		formattedInput: any,
@@ -251,10 +259,14 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
 		const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
 
+		// Centralized gating for unverified organizations
+		const stream = this.canOrganizationUseStreaming()
+		const enableGpt5ReasoningSummary = this.canOrganizationUseGpt5ReasoningSummary()
+
 		const body: Gpt5RequestBody = {
 			model: model.id,
 			input: formattedInput,
-			stream: true,
+			stream: stream,
 			store: metadata?.store !== false, // Default to true unless explicitly set to false
 			// Always include instructions (system prompt) for Responses API.
 			// Unlike Chat Completions, system/developer roles in input have no special semantics here.
@@ -263,7 +275,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			...(reasoningEffort && {
 				reasoning: {
 					effort: reasoningEffort,
-					...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
+					...(enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
 				},
 			}),
 			// Only include temperature if the model supports it
@@ -300,77 +312,171 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		systemPrompt?: string,
 		messages?: Anthropic.Messages.MessageParam[],
 	): ApiStream {
-		try {
-			// Use the official SDK
-			const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>
+		// Handle non-streaming responses
+		if (requestBody.stream === false) {
+			yield* this.withPreviousIdRetry({
+				requestBody,
+				context: { systemPrompt, messages },
+				attempt: async (body: any) => this.performNonStreamingSdkRequest(body, model),
+				fallback: async (_body: any, originalError: any) => {
+					throw this.formatSdkError(originalError)
+				},
+			})
+			return
+		}
 
-			if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
-				throw new Error(
-					"OpenAI SDK did not return an AsyncIterable for Responses API streaming. Falling back to SSE.",
-				)
-			}
+		// Streaming path with SSE fallback
+		yield* this.withPreviousIdRetry({
+			requestBody,
+			context: { systemPrompt, messages, metadata },
+			attempt: async (body: any) => this.performStreamingSdkRequest(body, model),
+			fallback: async (body: any, _err: any) =>
+				this.makeGpt5ResponsesAPIRequest(body, model, metadata, systemPrompt, messages),
+		})
+	}
 
-			for await (const event of stream) {
-				for await (const outChunk of this.processEvent(event, model)) {
-					yield outChunk
+	// Generic retry orchestrator for previous_response_id related failures
+	private async *withPreviousIdRetry({
+		requestBody,
+		attempt,
+		fallback,
+		context: { systemPrompt, messages, metadata } = {} as any,
+	}: {
+		requestBody: any
+		attempt: (body: any) => ApiStream | Promise<ApiStream>
+		fallback: (body: any, originalError: any) => ApiStream | Promise<ApiStream> | void
+		context?: {
+			systemPrompt?: string
+			messages?: Anthropic.Messages.MessageParam[]
+			metadata?: ApiHandlerCreateMessageMetadata
+		}
+	}): ApiStream {
+		try {
+			yield* await attempt(requestBody)
+		} catch (err: any) {
+			const firstErrorDetail = this.extractErrorDetail(err)
+			if (this.shouldRetryWithoutPreviousId(err, requestBody)) {
+				// Log the error and retry without the previous_response_id
+				const retryRequestBody = this.prepareRetryRequestBody(requestBody, systemPrompt, messages)
+				try {
+					// Retry via helper; if helper throws we'll fallback below
+					yield* await attempt(retryRequestBody)
+					return
+				} catch (retryErr) {
+					// If retry also fails, use fallback mechanism
+					// Always attach original detail so downstream formatting can fall back if needed
+					if (firstErrorDetail) {
+						;(retryErr as any).previousErrorDetail = firstErrorDetail
+					}
+					const fb = await fallback(retryRequestBody, retryErr)
+					if (fb) {
+						yield* fb
+					}
+					return
 				}
 			}
-		} catch (sdkErr: any) {
-			// Check if this is a 400 error about previous_response_id not found
-			const errorMessage = sdkErr?.message || sdkErr?.error?.message || ""
-			const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400
-			const isPreviousResponseError =
-				errorMessage.includes("Previous response") || errorMessage.includes("not found")
-
-			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
-				// Log the error and retry without the previous_response_id
+			const fb = await fallback(requestBody, err)
+			if (fb) {
+				yield* fb
+			}
+		}
+	}
 
-				// Clear the stored lastResponseId to prevent using it again
-				this.lastResponseId = undefined
+	private async *performNonStreamingSdkRequest(requestBody: any, model: OpenAiNativeModel): ApiStream {
+		// Perform a non-streaming SDK request and yield the response chunks
+		const response = await (this.client as any).responses.create(requestBody)
+		yield* this.handleResponse(response, model)
+	}
 
-				// Re-prepare the full conversation without previous_response_id
-				let retryRequestBody = { ...requestBody }
-				delete retryRequestBody.previous_response_id
+	private async *performStreamingSdkRequest(requestBody: any, model: OpenAiNativeModel): ApiStream {
+		// Perform a streaming SDK request and yield processed events
+		const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>
 
-				// If we have the original messages, re-prepare the full conversation
-				if (systemPrompt && messages) {
-					const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
-					retryRequestBody.input = formattedInput
-				}
+		if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
+			throw new Error(
+				"OpenAI SDK did not return an AsyncIterable for Responses API streaming. Falling back to SSE.",
+			)
+		}
 
-				try {
-					// Retry with the SDK
-					const retryStream = (await (this.client as any).responses.create(
-						retryRequestBody,
-					)) as AsyncIterable<any>
-
-					if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
-						// If SDK fails, fall back to SSE
-						yield* this.makeGpt5ResponsesAPIRequest(
-							retryRequestBody,
-							model,
-							metadata,
-							systemPrompt,
-							messages,
-						)
-						return
-					}
+		for await (const event of stream) {
+			for await (const outChunk of this.processEvent(event, model)) {
+				yield outChunk
+			}
+		}
+	}
 
-					for await (const event of retryStream) {
-						for await (const outChunk of this.processEvent(event, model)) {
-							yield outChunk
+	private prepareRetryRequestBody(
+		requestBody: any,
+		systemPrompt: string | undefined,
+		messages: Anthropic.Messages.MessageParam[] | undefined,
+	) {
+		// Clear the stored lastResponseId to prevent using it again
+		this.lastResponseId = undefined
+		// Resolve the promise once to unblock any waiting requests
+		this.resolveResponseId(undefined)
+
+		// Re-prepare the full conversation without previous_response_id
+		let retryRequestBody = { ...requestBody }
+		delete retryRequestBody.previous_response_id
+
+		// If we have the original messages, re-prepare the full conversation
+		if (systemPrompt && messages) {
+			const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
+			retryRequestBody.input = formattedInput
+		}
+		return retryRequestBody
+	}
+
+	private shouldRetryWithoutPreviousId(sdkErr: any, requestBody: any) {
+		// Check if this is a 400 error about previous_response_id not found
+		const errorMessage = sdkErr?.message || sdkErr?.error?.message || ""
+		const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400
+		const isPreviousResponseError = errorMessage.includes("Previous response") || errorMessage.includes("not found")
+
+		return is400Error && requestBody.previous_response_id && isPreviousResponseError
+	}
+
+	private async *handleResponse(response: any, model: OpenAiNativeModel): ApiStream {
+		if (response?.output && Array.isArray(response.output)) {
+			for (const outputItem of response.output) {
+				if (outputItem.type === "message" && outputItem.content) {
+					for (const content of outputItem.content) {
+						if (content.type === "output_text" && content.text) {
+							yield { type: "text", text: content.text }
+						}
+					}
+				}
+				// Handle reasoning summaries if present
+				if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) {
+					for (const summary of outputItem.summary) {
+						if (summary?.type === "summary_text" && typeof summary.text === "string") {
+							yield { type: "reasoning", text: summary.text }
 						}
 					}
-					return
-				} catch (retryErr) {
-					// If retry also fails, fall back to SSE
-					yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages)
-					return
 				}
 			}
+		}
+
+		// Fallback: check for direct text in response
+		if (response?.text) {
+			yield { type: "text", text: response.text }
+		}
+
+		// Handle usage data
+		if (response?.usage) {
+			const usageData = this.normalizeUsage(response.usage, model)
+			if (usageData) {
+				yield usageData
+			}
+		}
 
-			// For other errors, fallback to manual SSE via fetch
-			yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages)
+		// Store response ID for conversation continuity
+		if (response?.id) {
+			this.resolveResponseId(response.id)
+		}
+		// Capture resolved service tier if present
+		if (response?.service_tier) {
+			this.lastServiceTier = response.service_tier as ServiceTier
 		}
 	}
 
@@ -545,32 +651,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 					return
 				}
 
-				// Provide user-friendly error messages based on status code
-				switch (response.status) {
-					case 400:
-						errorMessage = "Invalid request to Responses API. Please check your input parameters."
-						break
-					case 401:
-						errorMessage = "Authentication failed. Please check your OpenAI API key."
-						break
-					case 403:
-						errorMessage = "Access denied. Your API key may not have access to this endpoint."
-						break
-					case 404:
-						errorMessage =
-							"Responses API endpoint not found. The endpoint may not be available yet or requires a different configuration."
-						break
-					case 429:
-						errorMessage = "Rate limit exceeded. Please try again later."
-						break
-					case 500:
-					case 502:
-					case 503:
-						errorMessage = "OpenAI service error. Please try again later."
-						break
-					default:
-						errorMessage = `Responses API error (${response.status})`
-				}
+				errorMessage = this.getErrorMessageByStatus(response.status)
 
 				// Append details if available
 				if (errorDetails) {
@@ -600,6 +681,55 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		}
 	}
 
+	private getErrorMessageByStatus(status: number) {
+		// Provide user-friendly error messages based on status code
+		if (!status) {
+			return "Responses API error: No status code"
+		}
+
+		let errorMessage
+		switch (status) {
+			case 400:
+				errorMessage = "Invalid request to Responses API. Please check your input parameters."
+				break
+			case 401:
+				errorMessage = "Authentication failed. Please check your OpenAI API key."
+				break
+			case 403:
+				errorMessage = "Access denied. Your API key may not have access to this endpoint."
+				break
+			case 404:
+				errorMessage =
+					"Responses API endpoint not found. The endpoint may not be available yet or requires a different configuration."
+				break
+			case 429:
+				errorMessage = "Rate limit exceeded. Please try again later."
+				break
+			case 500:
+			case 502:
+			case 503:
+				errorMessage = "OpenAI service error. Please try again later."
+				break
+			default:
+				errorMessage = `Responses API error (${status})`
+		}
+		return errorMessage
+	}
+
+	private formatSdkError(err: any): Error {
+		const status = err?.status || err?.response?.status
+		let errorMessage = this.getErrorMessageByStatus(status)
+		const errorDetails = this.extractErrorDetail(err)
+		if (errorDetails) {
+			errorMessage += ` - ${errorDetails}`
+		}
+		return new Error(errorMessage)
+	}
+
+	private extractErrorDetail(err: any) {
+		return err?.message || err?.error?.message || err?.previousErrorDetail || ""
+	}
+
 	/**
 	 * Prepares the input and conversation continuity parameters for a Responses API call.
 	 * Decides whether to send full conversation or just the latest message based on previousResponseId.
@@ -1300,7 +1430,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			if (reasoningEffort) {
 				requestBody.reasoning = {
 					effort: reasoningEffort,
-					...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
+					...(this.canOrganizationUseGpt5ReasoningSummary() ? { summary: "auto" as const } : {}),
 				}
 			}
 
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 3b6536f75b..672096bfdc 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -1,7 +1,7 @@
 import React, { memo, useCallback, useEffect, useMemo, useState } from "react"
 import { convertHeadersToObject } from "./utils/headers"
 import { useDebounce } from "react-use"
-import { VSCodeLink, VSCodeButton } from "@vscode/webview-ui-toolkit/react"
+import { VSCodeLink, VSCodeButton, VSCodeCheckbox } from "@vscode/webview-ui-toolkit/react"
 import { ExternalLinkIcon } from "@radix-ui/react-icons"
 
 import {
@@ -824,6 +824,37 @@ const ApiOptions = ({
 									</div>
 								</div>
 							)}
+						{selectedProvider === "openai-native" && (
+							<div className="space-y-4">
+								<div>
+									<VSCodeCheckbox
+										checked={apiConfiguration?.openAiNativeDisableStreaming ?? false}
+										onChange={(e: any) =>
+											setApiConfigurationField("openAiNativeDisableStreaming", e.target.checked)
+										}>
+										{t("settings:providers.disableStreaming")}
+									</VSCodeCheckbox>
+									<div className="text-sm text-vscode-descriptionForeground mt-1">
+										{t("settings:providers.disableStreamingDescription")}
+									</div>
+								</div>
+								<div>
+									<VSCodeCheckbox
+										checked={apiConfiguration?.openAiNativeDisableReasoningSummaries ?? false}
+										onChange={(e: any) =>
+											setApiConfigurationField(
+												"openAiNativeDisableReasoningSummaries",
+												e.target.checked,
+											)
+										}>
+										{t("settings:providers.disableReasoningSummaries")}
+									</VSCodeCheckbox>
+									<div className="text-sm text-vscode-descriptionForeground mt-1">
+										{t("settings:providers.disableReasoningSummariesDescription")}
+									</div>
+								</div>
+							</div>
+						)}
 					</CollapsibleContent>
 				</Collapsible>
 			)}
diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json
index 4f3e4d9924..7eff7274ed 100644
--- a/webview-ui/src/i18n/locales/ca/settings.json
+++ b/webview-ui/src/i18n/locales/ca/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Restablir als valors per defecte"
 		},
+		"disableStreaming": "Desactivar streaming",
+		"disableStreamingDescription": "Desactivar el streaming de resposta en temps real per als models OpenAI.",
+		"disableReasoningSummaries": "Desactivar resums de raonament",
+		"disableReasoningSummariesDescription": "Desactivar els resums de raonament en models com GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Límit de freqüència",
 			"description": "Temps mínim entre sol·licituds d'API."
diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json
index 4e75f6af2a..1b930042db 100644
--- a/webview-ui/src/i18n/locales/de/settings.json
+++ b/webview-ui/src/i18n/locales/de/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Auf Standardwerte zurücksetzen"
 		},
+		"disableStreaming": "Streaming deaktivieren",
+		"disableStreamingDescription": "Echtzeit-Antwort-Streaming für OpenAI-Modelle deaktivieren.",
+		"disableReasoningSummaries": "Reasoning-Zusammenfassungen deaktivieren",
+		"disableReasoningSummariesDescription": "Reasoning-Zusammenfassungen in Modellen wie GPT-5 deaktivieren.",
 		"rateLimitSeconds": {
 			"label": "Ratenbegrenzung",
 			"description": "Minimale Zeit zwischen API-Anfragen."
diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json
index 1be824b37e..c5a6afdebf 100644
--- a/webview-ui/src/i18n/locales/en/settings.json
+++ b/webview-ui/src/i18n/locales/en/settings.json
@@ -443,6 +443,10 @@
 			},
 			"resetDefaults": "Reset to Defaults"
 		},
+		"disableStreaming": "Disable Streaming",
+		"disableStreamingDescription": "Disable real-time response streaming for OpenAI models.",
+		"disableReasoningSummaries": "Disable Reasoning Summaries",
+		"disableReasoningSummariesDescription": "Disable reasoning summaries in models like GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Rate limit",
 			"description": "Minimum time between API requests."
diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json
index deb2bc7a22..fc3261d483 100644
--- a/webview-ui/src/i18n/locales/es/settings.json
+++ b/webview-ui/src/i18n/locales/es/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Restablecer valores predeterminados"
 		},
+		"disableStreaming": "Deshabilitar transmisión",
+		"disableStreamingDescription": "Deshabilitar la transmisión de respuesta en tiempo real para modelos OpenAI.",
+		"disableReasoningSummaries": "Deshabilitar resúmenes de razonamiento",
+		"disableReasoningSummariesDescription": "Deshabilitar los resúmenes de razonamiento en modelos como GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Límite de tasa",
 			"description": "Tiempo mínimo entre solicitudes de API."
diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json
index ccb8e61d7a..3a08385a38 100644
--- a/webview-ui/src/i18n/locales/fr/settings.json
+++ b/webview-ui/src/i18n/locales/fr/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Réinitialiser les valeurs par défaut"
 		},
+		"disableStreaming": "Désactiver le streaming",
+		"disableStreamingDescription": "Désactiver le streaming de réponse en temps réel pour les modèles OpenAI.",
+		"disableReasoningSummaries": "Désactiver les résumés de raisonnement",
+		"disableReasoningSummariesDescription": "Désactiver les résumés de raisonnement dans des modèles comme GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Limite de débit",
 			"description": "Temps minimum entre les requêtes API."
diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json
index 3d879e2ca7..d50bb65184 100644
--- a/webview-ui/src/i18n/locales/hi/settings.json
+++ b/webview-ui/src/i18n/locales/hi/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "डिफ़ॉल्ट पर रीसेट करें"
 		},
+		"disableStreaming": "स्ट्रीमिंग अक्षम करें",
+		"disableStreamingDescription": "OpenAI मॉडल के लिए रियल-टाइम रिस्पॉन्स स्ट्रीमिंग को अक्षम करें।",
+		"disableReasoningSummaries": "रीज़निंग सारांश अक्षम करें",
+		"disableReasoningSummariesDescription": "GPT-5 जैसे मॉडल में रीज़निंग सारांश को अक्षम करें।",
 		"rateLimitSeconds": {
 			"label": "दर सीमा",
 			"description": "API अनुरोधों के बीच न्यूनतम समय।"
diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json
index 8138726c33..4408473e12 100644
--- a/webview-ui/src/i18n/locales/id/settings.json
+++ b/webview-ui/src/i18n/locales/id/settings.json
@@ -448,6 +448,10 @@
 			},
 			"resetDefaults": "Reset ke Default"
 		},
+		"disableStreaming": "Nonaktifkan streaming",
+		"disableStreamingDescription": "Nonaktifkan streaming respons real-time untuk model OpenAI.",
+		"disableReasoningSummaries": "Nonaktifkan ringkasan reasoning",
+		"disableReasoningSummariesDescription": "Nonaktifkan ringkasan reasoning dalam model seperti GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Rate limit",
 			"description": "Waktu minimum antara permintaan API."
diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json
index 80ff0f8a71..6e6fd609fa 100644
--- a/webview-ui/src/i18n/locales/it/settings.json
+++ b/webview-ui/src/i18n/locales/it/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Ripristina valori predefiniti"
 		},
+		"disableStreaming": "Disabilita streaming",
+		"disableStreamingDescription": "Disabilita lo streaming delle risposte in tempo reale per i modelli OpenAI.",
+		"disableReasoningSummaries": "Disabilita riassunti di ragionamento",
+		"disableReasoningSummariesDescription": "Disabilita i riassunti di ragionamento in modelli come GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Limite di frequenza",
 			"description": "Tempo minimo tra le richieste API."
diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json
index 264d774473..e1ce3c1299 100644
--- a/webview-ui/src/i18n/locales/ja/settings.json
+++ b/webview-ui/src/i18n/locales/ja/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "デフォルトにリセット"
 		},
+		"disableStreaming": "ストリーミングを無効にする",
+		"disableStreamingDescription": "OpenAIモデルのリアルタイム応答ストリーミングを無効にします。",
+		"disableReasoningSummaries": "推論要約を無効にする",
+		"disableReasoningSummariesDescription": "GPT-5などのモデルで推論要約を無効にします。",
 		"rateLimitSeconds": {
 			"label": "レート制限",
 			"description": "APIリクエスト間の最小時間。"
diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json
index e490e31f78..d35853485c 100644
--- a/webview-ui/src/i18n/locales/ko/settings.json
+++ b/webview-ui/src/i18n/locales/ko/settings.json
@@ -448,6 +448,10 @@
 			"label": "속도 제한",
 			"description": "API 요청 간 최소 시간."
 		},
+		"disableStreaming": "스트리밍 비활성화",
+		"disableStreamingDescription": "OpenAI 모델의 실시간 응답 스트리밍을 비활성화합니다.",
+		"disableReasoningSummaries": "추론 요약 비활성화",
+		"disableReasoningSummariesDescription": "GPT-5와 같은 모델에서 추론 요약을 비활성화합니다.",
 		"consecutiveMistakeLimit": {
 			"label": "오류 및 반복 제한",
 			"description": "'Roo에 문제가 발생했습니다' 대화 상자를 표시하기 전의 연속 오류 또는 반복 작업 수",
diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json
index ee0ba193e5..2457002f13 100644
--- a/webview-ui/src/i18n/locales/nl/settings.json
+++ b/webview-ui/src/i18n/locales/nl/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Standaardwaarden herstellen"
 		},
+		"disableStreaming": "Streaming uitschakelen",
+		"disableStreamingDescription": "Schakel realtime respons streaming uit voor OpenAI modellen.",
+		"disableReasoningSummaries": "Redenering samenvattingen uitschakelen",
+		"disableReasoningSummariesDescription": "Schakel redenering samenvattingen uit in modellen zoals GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Snelheidslimiet",
 			"description": "Minimale tijd tussen API-verzoeken."
diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json
index 2d30547d9f..a0a3b7b5fe 100644
--- a/webview-ui/src/i18n/locales/pl/settings.json
+++ b/webview-ui/src/i18n/locales/pl/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Przywróć domyślne"
 		},
+		"disableStreaming": "Wyłącz strumieniowanie",
+		"disableStreamingDescription": "Wyłącz strumieniowanie odpowiedzi w czasie rzeczywistym dla modeli OpenAI.",
+		"disableReasoningSummaries": "Wyłącz podsumowania rozumowania",
+		"disableReasoningSummariesDescription": "Wyłącz podsumowania rozumowania w modelach takich jak GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Limit szybkości",
 			"description": "Minimalny czas między żądaniami API."
diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json
index 338ab9f6b1..685839f2c5 100644
--- a/webview-ui/src/i18n/locales/pt-BR/settings.json
+++ b/webview-ui/src/i18n/locales/pt-BR/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Restaurar Padrões"
 		},
+		"disableStreaming": "Desabilitar streaming",
+		"disableStreamingDescription": "Desabilitar o streaming de resposta em tempo real para modelos OpenAI.",
+		"disableReasoningSummaries": "Desabilitar resumos de raciocínio",
+		"disableReasoningSummariesDescription": "Desabilitar os resumos de raciocínio em modelos como GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Limite de taxa",
 			"description": "Tempo mínimo entre requisições de API."
diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json
index be494c571b..fa33e4f519 100644
--- a/webview-ui/src/i18n/locales/ru/settings.json
+++ b/webview-ui/src/i18n/locales/ru/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Сбросить к значениям по умолчанию"
 		},
+		"disableStreaming": "Отключить потоковую передачу",
+		"disableStreamingDescription": "Отключить потоковую передачу ответов в реальном времени для моделей OpenAI.",
+		"disableReasoningSummaries": "Отключить резюме рассуждений",
+		"disableReasoningSummariesDescription": "Отключить резюме рассуждений в моделях, таких как GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Лимит скорости",
 			"description": "Минимальное время между запросами к API."
diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json
index fe4508495b..c62029d6c4 100644
--- a/webview-ui/src/i18n/locales/tr/settings.json
+++ b/webview-ui/src/i18n/locales/tr/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Varsayılanlara Sıfırla"
 		},
+		"disableStreaming": "Akışı devre dışı bırak",
+		"disableStreamingDescription": "OpenAI modelleri için gerçek zamanlı yanıt akışını devre dışı bırakın.",
+		"disableReasoningSummaries": "Akıl yürütme özetlerini devre dışı bırak",
+		"disableReasoningSummariesDescription": "GPT-5 gibi modellerde akıl yürütme özetlerini devre dışı bırakın.",
 		"rateLimitSeconds": {
 			"label": "Hız sınırı",
 			"description": "API istekleri arasındaki minimum süre."
diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json
index 0f03de47a5..aa0c3b8e55 100644
--- a/webview-ui/src/i18n/locales/vi/settings.json
+++ b/webview-ui/src/i18n/locales/vi/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "Đặt lại về mặc định"
 		},
+		"disableStreaming": "Tắt streaming",
+		"disableStreamingDescription": "Tắt streaming phản hồi thời gian thực cho các mô hình OpenAI.",
+		"disableReasoningSummaries": "Tắt tóm tắt lý luận",
+		"disableReasoningSummariesDescription": "Tắt tóm tắt lý luận trong các mô hình như GPT-5.",
 		"rateLimitSeconds": {
 			"label": "Giới hạn tốc độ",
 			"description": "Thời gian tối thiểu giữa các yêu cầu API."
diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json
index 51db19562a..b0927afbf2 100644
--- a/webview-ui/src/i18n/locales/zh-CN/settings.json
+++ b/webview-ui/src/i18n/locales/zh-CN/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "重置为默认值"
 		},
+		"disableStreaming": "禁用流式传输",
+		"disableStreamingDescription": "为 OpenAI 模型禁用实时响应流式传输。",
+		"disableReasoningSummaries": "禁用推理摘要",
+		"disableReasoningSummariesDescription": "在 GPT-5 等模型中禁用推理摘要。",
 		"rateLimitSeconds": {
 			"label": "API 请求频率限制",
 			"description": "设置API请求的最小间隔时间"
diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json
index 89d517f5b5..6796f5c333 100644
--- a/webview-ui/src/i18n/locales/zh-TW/settings.json
+++ b/webview-ui/src/i18n/locales/zh-TW/settings.json
@@ -444,6 +444,10 @@
 			},
 			"resetDefaults": "重設為預設值"
 		},
+		"disableStreaming": "停用串流傳輸",
+		"disableStreamingDescription": "為 OpenAI 模型停用即時回應串流傳輸。",
+		"disableReasoningSummaries": "停用推理摘要",
+		"disableReasoningSummariesDescription": "在 GPT-5 等模型中停用推理摘要。",
 		"rateLimitSeconds": {
 			"label": "速率限制",
 			"description": "API 請求間的最短時間"