diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts
index 21baf3f203..7197ab29a1 100644
--- a/packages/types/src/message.ts
+++ b/packages/types/src/message.ts
@@ -176,6 +176,17 @@ export const clineMessageSchema = z.object({
 	contextCondense: contextCondenseSchema.optional(),
 	isProtected: z.boolean().optional(),
 	apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(),
+	metadata: z
+		.object({
+			gpt5: z
+				.object({
+					previous_response_id: z.string().optional(),
+					instructions: z.string().optional(),
+					reasoning_summary: z.string().optional(),
+				})
+				.optional(),
+		})
+		.optional(),
 })
 
 export type ClineMessage = z.infer<typeof clineMessageSchema>
diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts
index a09790578b..90b61ad879 100644
--- a/packages/types/src/model.ts
+++ b/packages/types/src/model.ts
@@ -44,6 +44,8 @@ export const modelInfoSchema = z.object({
 	supportsImages: z.boolean().optional(),
 	supportsComputerUse: z.boolean().optional(),
 	supportsPromptCache: z.boolean(),
+	// Capability flag to indicate whether the model supports an output verbosity parameter
+	supportsVerbosity: z.boolean().optional(),
 	supportsReasoningBudget: z.boolean().optional(),
 	requiredReasoningBudget: z.boolean().optional(),
 	supportsReasoningEffort: z.boolean().optional(),
diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index f0c90101fc..aebfd4dbe5 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -3,6 +3,11 @@ import { z } from "zod"
 import { reasoningEffortsSchema, verbosityLevelsSchema, modelInfoSchema } from "./model.js"
 import { codebaseIndexProviderSchema } from "./codebase-index.js"
 
+// Extended schema that includes "minimal" for GPT-5 models
+export const extendedReasoningEffortsSchema = z.union([reasoningEffortsSchema, z.literal("minimal")])
+
+export type ReasoningEffortWithMinimal = z.infer<typeof extendedReasoningEffortsSchema>
+
 /**
  * ProviderName
  */
@@ -76,7 +81,7 @@ const baseProviderSettingsSchema = z.object({
 
 	// Model reasoning.
 	enableReasoningEffort: z.boolean().optional(),
-	reasoningEffort: reasoningEffortsSchema.optional(),
+	reasoningEffort: extendedReasoningEffortsSchema.optional(),
 	modelMaxTokens: z.number().optional(),
 	modelMaxThinkingTokens: z.number().optional(),
 
diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
index b319be2a5f..02fadb412d 100644
--- a/packages/types/src/providers/openai.ts
+++ b/packages/types/src/providers/openai.ts
@@ -12,10 +12,13 @@ export const openAiNativeModels = {
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 1.25,
 		outputPrice: 10.0,
 		cacheReadsPrice: 0.13,
 		description: "GPT-5: The best model for coding and agentic tasks across domains",
+		// supportsVerbosity is a new capability; ensure ModelInfo includes it
+		supportsVerbosity: true,
 	},
 	"gpt-5-mini-2025-08-07": {
 		maxTokens: 128000,
@@ -23,10 +26,12 @@ export const openAiNativeModels = {
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0.25,
 		outputPrice: 2.0,
 		cacheReadsPrice: 0.03,
 		description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
+		supportsVerbosity: true,
 	},
 	"gpt-5-nano-2025-08-07": {
 		maxTokens: 128000,
@@ -34,10 +39,12 @@ export const openAiNativeModels = {
 		supportsImages: true,
 		supportsPromptCache: true,
 		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0.05,
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.01,
 		description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
+		supportsVerbosity: true,
 	},
 	"gpt-4.1": {
 		maxTokens: 32_768,
@@ -229,5 +236,6 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
 export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
 
 export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
+export const GPT5_DEFAULT_TEMPERATURE = 1.0
 
 export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
diff --git a/src/api/index.ts b/src/api/index.ts
index 57b06f7bbd..5e705a80d2 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -44,6 +44,13 @@ export interface SingleCompletionHandler {
 export interface ApiHandlerCreateMessageMetadata {
 	mode?: string
 	taskId: string
+	previousResponseId?: string
+	/**
+	 * When true, the provider must NOT fall back to internal continuity state
+	 * (e.g., lastResponseId) if previousResponseId is absent.
+	 * Used to enforce "skip once" after a condense operation.
+	 */
+	suppressPreviousResponseId?: boolean
 }
 
 export interface ApiHandler {
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
index fdd71ba3f6..23f19e3d48 100644
--- a/src/api/providers/__tests__/openai-native.spec.ts
+++ b/src/api/providers/__tests__/openai-native.spec.ts
@@ -160,8 +160,12 @@ describe("OpenAiNativeHandler", () => {
 			expect(results.length).toBe(1)
 			expect(results[0].type).toBe("usage")
 			// Use type assertion to avoid TypeScript errors
-			expect((results[0] as any).inputTokens).toBe(0)
-			expect((results[0] as any).outputTokens).toBe(0)
+			const usageResult = results[0] as any
+			expect(usageResult.inputTokens).toBe(0)
+			expect(usageResult.outputTokens).toBe(0)
+			// When no cache tokens are present, they should be undefined
+			expect(usageResult.cacheWriteTokens).toBeUndefined()
+			expect(usageResult.cacheReadTokens).toBeUndefined()
 
 			// Verify developer role is used for system prompt with o1 model
 			expect(mockCreate).toHaveBeenCalledWith({
@@ -286,6 +290,111 @@ describe("OpenAiNativeHandler", () => {
 			expect((results[1] as any).outputTokens).toBe(5)
 			expect((results[1] as any).totalCost).toBeCloseTo(0.00006, 6)
 		})
+
+		it("should handle cache tokens in streaming response", async () => {
+			const mockStream = [
+				{ choices: [{ delta: { content: "Hello" } }], usage: null },
+				{ choices: [{ delta: { content: " cached" } }], usage: null },
+				{
+					choices: [{ delta: { content: " response" } }],
+					usage: {
+						prompt_tokens: 100,
+						completion_tokens: 10,
+						prompt_tokens_details: {
+							cached_tokens: 80,
+							audio_tokens: 0,
+						},
+						completion_tokens_details: {
+							reasoning_tokens: 0,
+							audio_tokens: 0,
+							accepted_prediction_tokens: 0,
+							rejected_prediction_tokens: 0,
+						},
+					},
+				},
+			]
+
+			mockCreate.mockResolvedValueOnce(
+				(async function* () {
+					for (const chunk of mockStream) {
+						yield chunk
+					}
+				})(),
+			)
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			const results = []
+			for await (const result of generator) {
+				results.push(result)
+			}
+
+			// Verify text responses
+			expect(results.length).toBe(4)
+			expect(results[0]).toMatchObject({ type: "text", text: "Hello" })
+			expect(results[1]).toMatchObject({ type: "text", text: " cached" })
+			expect(results[2]).toMatchObject({ type: "text", text: " response" })
+
+			// Check usage data includes cache tokens
+			expect(results[3].type).toBe("usage")
+			const usageChunk = results[3] as any
+			expect(usageChunk.inputTokens).toBe(100) // Total input tokens (includes cached)
+			expect(usageChunk.outputTokens).toBe(10)
+			expect(usageChunk.cacheReadTokens).toBe(80) // Cached tokens from prompt_tokens_details
+			expect(usageChunk.cacheWriteTokens).toBeUndefined() // No cache write tokens in standard response
+
+			// Verify cost calculation takes cache into account
+			// GPT-4.1 pricing: input $2/1M, output $8/1M, cache read $0.5/1M
+			// OpenAI's prompt_tokens includes cached tokens, so we need to calculate:
+			// - Non-cached input tokens: 100 - 80 = 20
+			// - Cost for non-cached input: (20 / 1_000_000) * 2.0
+			// - Cost for cached input: (80 / 1_000_000) * 0.5
+			// - Cost for output: (10 / 1_000_000) * 8.0
+			const nonCachedInputTokens = 100 - 80
+			const expectedNonCachedInputCost = (nonCachedInputTokens / 1_000_000) * 2.0
+			const expectedCacheReadCost = (80 / 1_000_000) * 0.5
+			const expectedOutputCost = (10 / 1_000_000) * 8.0
+			const expectedTotalCost = expectedNonCachedInputCost + expectedCacheReadCost + expectedOutputCost
+			expect(usageChunk.totalCost).toBeCloseTo(expectedTotalCost, 10)
+		})
+
+		it("should handle cache write tokens if present", async () => {
+			const mockStream = [
+				{ choices: [{ delta: { content: "Test" } }], usage: null },
+				{
+					choices: [{ delta: {} }],
+					usage: {
+						prompt_tokens: 150,
+						completion_tokens: 5,
+						prompt_tokens_details: {
+							cached_tokens: 50,
+						},
+						cache_creation_input_tokens: 30, // Cache write tokens
+					},
+				},
+			]
+
+			mockCreate.mockResolvedValueOnce(
+				(async function* () {
+					for (const chunk of mockStream) {
+						yield chunk
+					}
+				})(),
+			)
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			const results = []
+			for await (const result of generator) {
+				results.push(result)
+			}
+
+			// Check usage data includes both cache read and write tokens
+			const usageChunk = results.find((r) => r.type === "usage") as any
+			expect(usageChunk).toBeDefined()
+			expect(usageChunk.inputTokens).toBe(150)
+			expect(usageChunk.outputTokens).toBe(5)
+			expect(usageChunk.cacheReadTokens).toBe(50)
+			expect(usageChunk.cacheWriteTokens).toBe(30)
+		})
 	})
 
 	describe("completePrompt", () => {
@@ -461,7 +570,40 @@ describe("OpenAiNativeHandler", () => {
 	})
 
 	describe("GPT-5 models", () => {
-		it("should handle GPT-5 model with developer role", async () => {
+		it("should handle GPT-5 model with Responses API", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						// Simulate actual GPT-5 Responses API SSE stream format
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.created","response":{"id":"test","status":"in_progress"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Hello"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":" world"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.done","response":{"usage":{"prompt_tokens":10,"completion_tokens":2}}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -473,20 +615,56 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify developer role is used for GPT-5 with default parameters
-			expect(mockCreate).toHaveBeenCalledWith(
+			// Verify Responses API is called with correct parameters
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
 				expect.objectContaining({
-					model: "gpt-5-2025-08-07",
-					messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }],
-					stream: true,
-					stream_options: { include_usage: true },
-					reasoning_effort: "minimal", // Default for GPT-5
-					verbosity: "medium", // Default verbosity
+					method: "POST",
+					headers: expect.objectContaining({
+						"Content-Type": "application/json",
+						Authorization: "Bearer test-api-key",
+						Accept: "text/event-stream",
+					}),
+					body: expect.any(String),
 				}),
 			)
+			const body1 = (mockFetch.mock.calls[0][1] as any).body as string
+			expect(body1).toContain('"model":"gpt-5-2025-08-07"')
+			expect(body1).toContain('"input":"Developer: You are a helpful assistant.\\n\\nUser: Hello!"')
+			expect(body1).toContain('"effort":"medium"')
+			expect(body1).toContain('"summary":"auto"')
+			expect(body1).toContain('"verbosity":"medium"')
+			expect(body1).toContain('"temperature":1')
+			expect(body1).toContain('"max_output_tokens"')
+
+			// Verify the streamed content
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(2)
+			expect(textChunks[0].text).toBe("Hello")
+			expect(textChunks[1].text).toBe(" world")
+
+			// Clean up
+			delete (global as any).fetch
 		})
 
-		it("should handle GPT-5-mini model", async () => {
+		it("should handle GPT-5-mini model with Responses API", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-mini-2025-08-07",
@@ -498,19 +676,36 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			expect(mockCreate).toHaveBeenCalledWith(
+			// Verify correct model and default parameters
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
 				expect.objectContaining({
-					model: "gpt-5-mini-2025-08-07",
-					messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }],
-					stream: true,
-					stream_options: { include_usage: true },
-					reasoning_effort: "minimal", // Default for GPT-5
-					verbosity: "medium", // Default verbosity
+					body: expect.stringContaining('"model":"gpt-5-mini-2025-08-07"'),
 				}),
 			)
+
+			// Clean up
+			delete (global as any).fetch
 		})
 
-		it("should handle GPT-5-nano model", async () => {
+		it("should handle GPT-5-nano model with Responses API", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Nano response"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-nano-2025-08-07",
@@ -522,19 +717,36 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			expect(mockCreate).toHaveBeenCalledWith(
+			// Verify correct model
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
 				expect.objectContaining({
-					model: "gpt-5-nano-2025-08-07",
-					messages: [{ role: "developer", content: expect.stringContaining(systemPrompt) }],
-					stream: true,
-					stream_options: { include_usage: true },
-					reasoning_effort: "minimal", // Default for GPT-5
-					verbosity: "medium", // Default verbosity
+					body: expect.stringContaining('"model":"gpt-5-nano-2025-08-07"'),
 				}),
 			)
+
+			// Clean up
+			delete (global as any).fetch
 		})
 
 		it("should support verbosity control for GPT-5", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Low verbosity"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -549,18 +761,77 @@ describe("OpenAiNativeHandler", () => {
 			}
 
 			// Verify that verbosity is passed in the request
-			expect(mockCreate).toHaveBeenCalledWith(
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
 				expect.objectContaining({
-					model: "gpt-5-2025-08-07",
-					messages: expect.any(Array),
-					stream: true,
-					stream_options: { include_usage: true },
-					verbosity: "low",
+					body: expect.stringContaining('"verbosity":"low"'),
 				}),
 			)
+
+			// Clean up
+			delete (global as any).fetch
 		})
 
 		it("should support minimal reasoning effort for GPT-5", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Minimal effort"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+				reasoningEffort: "minimal" as any, // GPT-5 supports minimal
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// With minimal reasoning effort, the model should pass it through
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
+				expect.objectContaining({
+					body: expect.stringContaining('"effort":"minimal"'),
+				}),
+			)
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should support low reasoning effort for GPT-5", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Low effort response"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -573,25 +844,48 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// With low reasoning effort, the model should pass it through
-			expect(mockCreate).toHaveBeenCalledWith(
+			// Should use Responses API with low reasoning effort
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
 				expect.objectContaining({
-					model: "gpt-5-2025-08-07",
-					messages: expect.any(Array),
-					stream: true,
-					stream_options: { include_usage: true },
-					reasoning_effort: "low",
-					verbosity: "medium", // Default verbosity
+					body: expect.any(String),
 				}),
 			)
+			const body2 = (mockFetch.mock.calls[0][1] as any).body as string
+			expect(body2).toContain('"model":"gpt-5-2025-08-07"')
+			expect(body2).toContain('"effort":"low"')
+			expect(body2).toContain('"summary":"auto"')
+			expect(body2).toContain('"verbosity":"medium"')
+			expect(body2).toContain('"temperature":1')
+			expect(body2).toContain('"max_output_tokens"')
+
+			// Clean up
+			delete (global as any).fetch
 		})
 
 		it("should support both verbosity and reasoning effort together for GPT-5", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"High verbosity minimal effort"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
-				verbosity: "high", // Set verbosity through options
-				reasoningEffort: "low", // Set reasoning effort
+				verbosity: "high",
+				reasoningEffort: "minimal" as any,
 			})
 
 			const stream = handler.createMessage(systemPrompt, messages)
@@ -600,17 +894,624 @@ describe("OpenAiNativeHandler", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify both parameters are passed
-			expect(mockCreate).toHaveBeenCalledWith(
+			// Should use Responses API with both parameters
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
 				expect.objectContaining({
-					model: "gpt-5-2025-08-07",
-					messages: expect.any(Array),
-					stream: true,
-					stream_options: { include_usage: true },
-					reasoning_effort: "low",
-					verbosity: "high",
+					body: expect.any(String),
 				}),
 			)
+			const body3 = (mockFetch.mock.calls[0][1] as any).body as string
+			expect(body3).toContain('"model":"gpt-5-2025-08-07"')
+			expect(body3).toContain('"effort":"minimal"')
+			expect(body3).toContain('"summary":"auto"')
+			expect(body3).toContain('"verbosity":"high"')
+			expect(body3).toContain('"temperature":1')
+			expect(body3).toContain('"max_output_tokens"')
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should handle actual GPT-5 Responses API format", async () => {
+			// Mock fetch with actual response format from GPT-5
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						// Test actual GPT-5 response format
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.created","response":{"id":"test","status":"in_progress"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.in_progress","response":{"status":"in_progress"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"First text"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":" Second text"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"reasoning","text":"Some reasoning"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.done","response":{"usage":{"prompt_tokens":100,"completion_tokens":20}}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should handle the actual format correctly
+			const textChunks = chunks.filter((c) => c.type === "text")
+			const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
+
+			expect(textChunks).toHaveLength(2)
+			expect(textChunks[0].text).toBe("First text")
+			expect(textChunks[1].text).toBe(" Second text")
+
+			expect(reasoningChunks).toHaveLength(1)
+			expect(reasoningChunks[0].text).toBe("Some reasoning")
+
+			// Should also have usage information with cost
+			const usageChunks = chunks.filter((c) => c.type === "usage")
+			expect(usageChunks).toHaveLength(1)
+			expect(usageChunks[0]).toMatchObject({
+				type: "usage",
+				inputTokens: 100,
+				outputTokens: 20,
+				totalCost: expect.any(Number),
+			})
+
+			// Verify cost calculation (GPT-5 pricing: input $1.25/M, output $10/M)
+			const expectedInputCost = (100 / 1_000_000) * 1.25
+			const expectedOutputCost = (20 / 1_000_000) * 10.0
+			const expectedTotalCost = expectedInputCost + expectedOutputCost
+			expect(usageChunks[0].totalCost).toBeCloseTo(expectedTotalCost, 10)
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should handle Responses API with no content gracefully", async () => {
+			// Mock fetch with empty response
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(new TextEncoder().encode('data: {"someField":"value"}\n\n'))
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+
+			// Should not throw, just warn
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have no content chunks when stream is empty
+			const contentChunks = chunks.filter((c) => c.type === "text" || c.type === "reasoning")
+
+			expect(contentChunks).toHaveLength(0)
+
+			// Clean up
+			delete (global as any).fetch
 		})
+
+		it("should support previous_response_id for conversation continuity", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						// Include response ID in the response
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.created","response":{"id":"resp_123","status":"in_progress"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response with ID"}}\n\n',
+							),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.done","response":{"id":"resp_123","usage":{"prompt_tokens":10,"completion_tokens":3}}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			// First request - should not have previous_response_id
+			const stream1 = handler.createMessage(systemPrompt, messages)
+			const chunks1: any[] = []
+			for await (const chunk of stream1) {
+				chunks1.push(chunk)
+			}
+
+			// Verify first request doesn't include previous_response_id
+			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
+			expect(firstCallBody.previous_response_id).toBeUndefined()
+
+			// Second request with metadata - should include previous_response_id
+			const stream2 = handler.createMessage(systemPrompt, messages, {
+				taskId: "test-task",
+				previousResponseId: "resp_456",
+			})
+			const chunks2: any[] = []
+			for await (const chunk of stream2) {
+				chunks2.push(chunk)
+			}
+
+			// Verify second request includes the provided previous_response_id
+			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
+			expect(secondCallBody.previous_response_id).toBe("resp_456")
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should handle unhandled stream events gracefully", async () => {
+			// Mock fetch for the fallback SSE path (which is what gets used when SDK fails)
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Hello"}}\n\n',
+							),
+						)
+						// This event is not handled, so it should be ignored
+						controller.enqueue(
+							new TextEncoder().encode('data: {"type":"response.audio.delta","delta":"..."}\n\n'),
+						)
+						controller.enqueue(new TextEncoder().encode('data: {"type":"response.done","response":{}}\n\n'))
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			// Also mock the SDK to throw an error so it falls back to fetch
+			const mockClient = {
+				responses: {
+					create: vitest.fn().mockRejectedValue(new Error("SDK not available")),
+				},
+			}
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			// Replace the client with our mock
+			;(handler as any).client = mockClient
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			const errors: any[] = []
+
+			try {
+				for await (const chunk of stream) {
+					chunks.push(chunk)
+				}
+			} catch (error) {
+				errors.push(error)
+			}
+
+			// Log for debugging
+			if (chunks.length === 0 && errors.length === 0) {
+				console.log("No chunks and no errors received")
+			}
+			if (errors.length > 0) {
+				console.log("Errors:", errors)
+			}
+
+			expect(errors.length).toBe(0)
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks.length).toBeGreaterThan(0)
+			expect(textChunks[0].text).toBe("Hello")
+
+			delete (global as any).fetch
+		})
+
+		it("should use stored response ID when metadata doesn't provide one", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest
+				.fn()
+				.mockResolvedValueOnce({
+					ok: true,
+					body: new ReadableStream({
+						start(controller) {
+							// First response with ID
+							controller.enqueue(
+								new TextEncoder().encode(
+									'data: {"type":"response.done","response":{"id":"resp_789","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
+								),
+							)
+							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+							controller.close()
+						},
+					}),
+				})
+				.mockResolvedValueOnce({
+					ok: true,
+					body: new ReadableStream({
+						start(controller) {
+							// Second response
+							controller.enqueue(
+								new TextEncoder().encode(
+									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
+								),
+							)
+							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+							controller.close()
+						},
+					}),
+				})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			// First request - establishes response ID
+			const stream1 = handler.createMessage(systemPrompt, messages)
+			for await (const chunk of stream1) {
+				// consume stream
+			}
+
+			// Second request without metadata - should use stored response ID
+			const stream2 = handler.createMessage(systemPrompt, messages, { taskId: "test-task" })
+			for await (const chunk of stream2) {
+				// consume stream
+			}
+
+			// Verify second request uses the stored response ID from first request
+			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
+			expect(secondCallBody.previous_response_id).toBe("resp_789")
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should only send latest message when using previous_response_id", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest
+				.fn()
+				.mockResolvedValueOnce({
+					ok: true,
+					body: new ReadableStream({
+						start(controller) {
+							// First response with ID
+							controller.enqueue(
+								new TextEncoder().encode(
+									'data: {"type":"response.done","response":{"id":"resp_001","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":50,"completion_tokens":1}}}\n\n',
+								),
+							)
+							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+							controller.close()
+						},
+					}),
+				})
+				.mockResolvedValueOnce({
+					ok: true,
+					body: new ReadableStream({
+						start(controller) {
+							// Second response
+							controller.enqueue(
+								new TextEncoder().encode(
+									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
+								),
+							)
+							controller.enqueue(
+								new TextEncoder().encode(
+									'data: {"type":"response.done","response":{"id":"resp_002","usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
+								),
+							)
+							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+							controller.close()
+						},
+					}),
+				})
+			global.fetch = mockFetch as any
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			// First request with full conversation
+			const firstMessages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "Hello" },
+				{ role: "assistant", content: "Hi there!" },
+				{ role: "user", content: "How are you?" },
+			]
+
+			const stream1 = handler.createMessage(systemPrompt, firstMessages)
+			for await (const chunk of stream1) {
+				// consume stream
+			}
+
+			// Verify first request sends full conversation
+			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
+			expect(firstCallBody.input).toContain("Hello")
+			expect(firstCallBody.input).toContain("Hi there!")
+			expect(firstCallBody.input).toContain("How are you?")
+			expect(firstCallBody.previous_response_id).toBeUndefined()
+
+			// Second request with previous_response_id - should only send latest message
+			const secondMessages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "Hello" },
+				{ role: "assistant", content: "Hi there!" },
+				{ role: "user", content: "How are you?" },
+				{ role: "assistant", content: "I'm doing well!" },
+				{ role: "user", content: "What's the weather?" }, // Latest message
+			]
+
+			const stream2 = handler.createMessage(systemPrompt, secondMessages, {
+				taskId: "test-task",
+				previousResponseId: "resp_001",
+			})
+			for await (const chunk of stream2) {
+				// consume stream
+			}
+
+			// Verify second request only sends the latest user message
+			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
+			expect(secondCallBody.input).toBe("User: What's the weather?")
+			expect(secondCallBody.input).not.toContain("Hello")
+			expect(secondCallBody.input).not.toContain("Hi there!")
+			expect(secondCallBody.input).not.toContain("How are you?")
+			expect(secondCallBody.previous_response_id).toBe("resp_001")
+
+			// Clean up
+			delete (global as any).fetch
+		})
+
+		it("should correctly prepare GPT-5 input with conversation continuity", () => {
+			const gpt5Handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5-2025-08-07",
+			})
+
+			// @ts-expect-error - private method
+			const { formattedInput, previousResponseId } = gpt5Handler.prepareGpt5Input(systemPrompt, messages, {
+				taskId: "task1",
+				previousResponseId: "resp_123",
+			})
+
+			expect(previousResponseId).toBe("resp_123")
+			expect(formattedInput).toBe("User: Hello!")
+		})
+
+		it("should provide helpful error messages for different error codes", async () => {
+			const testCases = [
+				{ status: 400, expectedMessage: "Invalid request to GPT-5 API" },
+				{ status: 401, expectedMessage: "Authentication failed" },
+				{ status: 403, expectedMessage: "Access denied" },
+				{ status: 404, expectedMessage: "GPT-5 API endpoint not found" },
+				{ status: 429, expectedMessage: "Rate limit exceeded" },
+				{ status: 500, expectedMessage: "OpenAI service error" },
+			]
+
+			for (const { status, expectedMessage } of testCases) {
+				// Mock fetch with error response
+				const mockFetch = vitest.fn().mockResolvedValue({
+					ok: false,
+					status,
+					statusText: "Error",
+					text: async () => JSON.stringify({ error: { message: "Test error" } }),
+				})
+				global.fetch = mockFetch as any
+
+				handler = new OpenAiNativeHandler({
+					...mockOptions,
+					apiModelId: "gpt-5-2025-08-07",
+				})
+
+				const stream = handler.createMessage(systemPrompt, messages)
+
+				await expect(async () => {
+					for await (const chunk of stream) {
+						// Should throw before yielding anything
+					}
+				}).rejects.toThrow(expectedMessage)
+			}
+
+			// Clean up
+			delete (global as any).fetch
+		})
+	})
+})
+
+// Added tests for GPT-5 streaming event coverage per PR_review_gpt5_final.md
+
+describe("GPT-5 streaming event coverage (additional)", () => {
+	it("should handle reasoning delta events for GPT-5", async () => {
+		const mockFetch = vitest.fn().mockResolvedValue({
+			ok: true,
+			body: new ReadableStream({
+				start(controller) {
+					controller.enqueue(
+						new TextEncoder().encode(
+							'data: {"type":"response.reasoning.delta","delta":"Thinking about the problem..."}\n\n',
+						),
+					)
+					controller.enqueue(
+						new TextEncoder().encode('data: {"type":"response.text.delta","delta":"The answer is..."}\n\n'),
+					)
+					controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+					controller.close()
+				},
+			}),
+		})
+		// @ts-ignore
+		global.fetch = mockFetch
+
+		const handler = new OpenAiNativeHandler({
+			apiModelId: "gpt-5-2025-08-07",
+			openAiNativeApiKey: "test-api-key",
+		})
+
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
+		const stream = handler.createMessage(systemPrompt, messages)
+
+		const chunks: any[] = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
+		const textChunks = chunks.filter((c) => c.type === "text")
+
+		expect(reasoningChunks).toHaveLength(1)
+		expect(reasoningChunks[0].text).toBe("Thinking about the problem...")
+		expect(textChunks).toHaveLength(1)
+		expect(textChunks[0].text).toBe("The answer is...")
+
+		// @ts-ignore
+		delete global.fetch
+	})
+
+	it("should handle refusal delta events for GPT-5 and prefix output", async () => {
+		const mockFetch = vitest.fn().mockResolvedValue({
+			ok: true,
+			body: new ReadableStream({
+				start(controller) {
+					controller.enqueue(
+						new TextEncoder().encode(
+							'data: {"type":"response.refusal.delta","delta":"I cannot comply with this request."}\n\n',
+						),
+					)
+					controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+					controller.close()
+				},
+			}),
+		})
+		// @ts-ignore
+		global.fetch = mockFetch
+
+		const handler = new OpenAiNativeHandler({
+			apiModelId: "gpt-5-2025-08-07",
+			openAiNativeApiKey: "test-api-key",
+		})
+
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Do something disallowed" }]
+		const stream = handler.createMessage(systemPrompt, messages)
+
+		const chunks: any[] = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		const textChunks = chunks.filter((c) => c.type === "text")
+		expect(textChunks).toHaveLength(1)
+		expect(textChunks[0].text).toBe("[Refusal] I cannot comply with this request.")
+
+		// @ts-ignore
+		delete global.fetch
+	})
+
+	it("should ignore malformed JSON lines in SSE stream", async () => {
+		const mockFetch = vitest.fn().mockResolvedValue({
+			ok: true,
+			body: new ReadableStream({
+				start(controller) {
+					controller.enqueue(
+						new TextEncoder().encode(
+							'data: {"type":"response.output_item.added","item":{"type":"text","text":"Before"}}\n\n',
+						),
+					)
+					// Malformed JSON line
+					controller.enqueue(
+						new TextEncoder().encode('data: {"type":"response.text.delta","delta":"Bad"\n\n'),
+					)
+					// Valid line after malformed
+					controller.enqueue(
+						new TextEncoder().encode(
+							'data: {"type":"response.output_item.added","item":{"type":"text","text":"After"}}\n\n',
+						),
+					)
+					controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+					controller.close()
+				},
+			}),
+		})
+		// @ts-ignore
+		global.fetch = mockFetch
+
+		const handler = new OpenAiNativeHandler({
+			apiModelId: "gpt-5-2025-08-07",
+			openAiNativeApiKey: "test-api-key",
+		})
+
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello!" }]
+		const stream = handler.createMessage(systemPrompt, messages)
+
+		const chunks: any[] = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// It should not throw and still capture the valid texts around the malformed line
+		const textChunks = chunks.filter((c) => c.type === "text")
+		expect(textChunks.map((c: any) => c.text)).toEqual(["Before", "After"])
+
+		// @ts-ignore
+		delete global.fetch
 	})
 })
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index 5e498bee45..8df70d31f1 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -7,8 +7,10 @@ import {
 	OpenAiNativeModelId,
 	openAiNativeModels,
 	OPENAI_NATIVE_DEFAULT_TEMPERATURE,
+	GPT5_DEFAULT_TEMPERATURE,
 	type ReasoningEffort,
 	type VerbosityLevel,
+	type ReasoningEffortWithMinimal,
 } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
@@ -16,7 +18,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import { calculateApiCostOpenAI } from "../../shared/cost"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
-import { ApiStream } from "../transform/stream"
+import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
 
 import { BaseProvider } from "./base-provider"
@@ -24,43 +26,77 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 
 export type OpenAiNativeModel = ReturnType<OpenAiNativeHandler["getModel"]>
 
-// GPT-5 specific types for Responses API
-type ReasoningEffortWithMinimal = ReasoningEffort | "minimal"
-
-interface GPT5ResponsesAPIParams {
-	model: string
-	input: string
-	reasoning?: {
-		effort: ReasoningEffortWithMinimal
-	}
-	text?: {
-		verbosity: VerbosityLevel
-	}
-}
-
-interface GPT5ResponseChunk {
-	type: "text" | "reasoning" | "usage"
-	text?: string
-	reasoning?: string
-	usage?: {
-		input_tokens: number
-		output_tokens: number
-		reasoning_tokens?: number
-		total_tokens: number
-	}
-}
+// GPT-5 specific types
 
 export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
+	private lastResponseId: string | undefined
+	private responseIdPromise: Promise<string | undefined> | undefined
+	private responseIdResolver: ((value: string | undefined) => void) | undefined
+
+	// Event types handled by the shared GPT-5 event processor to avoid duplication
+	private readonly gpt5CoreHandledTypes = new Set<string>([
+		"response.text.delta",
+		"response.output_text.delta",
+		"response.reasoning.delta",
+		"response.reasoning_text.delta",
+		"response.reasoning_summary.delta",
+		"response.reasoning_summary_text.delta",
+		"response.refusal.delta",
+		"response.output_item.added",
+		"response.done",
+		"response.completed",
+	])
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
+		// Default to including reasoning.summary: "auto" for GPT‑5 unless explicitly disabled
+		if (this.options.enableGpt5ReasoningSummary === undefined) {
+			this.options.enableGpt5ReasoningSummary = true
+		}
 		const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
 		this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey })
 	}
 
+	private normalizeGpt5Usage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
+		if (!usage) return undefined
+
+		const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
+		const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
+		const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
+		const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
+
+		const totalCost = calculateApiCostOpenAI(
+			model.info,
+			totalInputTokens,
+			totalOutputTokens,
+			cacheWriteTokens || 0,
+			cacheReadTokens || 0,
+		)
+
+		return {
+			type: "usage",
+			inputTokens: totalInputTokens,
+			outputTokens: totalOutputTokens,
+			cacheWriteTokens,
+			cacheReadTokens,
+			totalCost,
+		}
+	}
+
+	private resolveResponseId(responseId: string | undefined): void {
+		if (responseId) {
+			this.lastResponseId = responseId
+		}
+		// Resolve the promise so the next request can use this ID
+		if (this.responseIdResolver) {
+			this.responseIdResolver(responseId)
+			this.responseIdResolver = undefined
+		}
+	}
+
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
@@ -82,7 +118,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		} else if (model.id.startsWith("o1")) {
 			yield* this.handleO1FamilyMessage(model, systemPrompt, messages)
 		} else if (this.isGpt5Model(model.id)) {
-			yield* this.handleGpt5Message(model, systemPrompt, messages)
+			yield* this.handleGpt5Message(model, systemPrompt, messages, metadata)
 		} else {
 			yield* this.handleDefaultModelMessage(model, systemPrompt, messages)
 		}
@@ -157,8 +193,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			...(reasoning && reasoning),
 		}
 
-		// Add verbosity if supported (for future GPT-5 models)
-		if (verbosity && model.id.startsWith("gpt-5")) {
+		// Add verbosity if supported
+		if (verbosity) {
 			params.verbosity = verbosity
 		}
 
@@ -180,175 +216,915 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		model: OpenAiNativeModel,
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		// GPT-5 uses the Responses API, not Chat Completions
-		// We need to format the input as a single string combining system prompt and messages
-		const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages)
+		// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
+		const { verbosity } = this.getModel()
 
-		// Get reasoning effort, supporting the new "minimal" option for GPT-5
+		// Resolve reasoning effort (supports "minimal" for GPT‑5)
 		const reasoningEffort = this.getGpt5ReasoningEffort(model)
 
-		// Get verbosity from model settings, default to "medium" if not specified
-		const verbosity = model.verbosity || "medium"
+		// Wait for any pending response ID from a previous request to be available
+		// This handles the race condition with fast nano model responses
+		let effectivePreviousResponseId = metadata?.previousResponseId
+
+		// Only allow fallback to pending/last response id when not explicitly suppressed
+		if (!metadata?.suppressPreviousResponseId) {
+			// If we have a pending response ID promise, wait for it to resolve
+			if (!effectivePreviousResponseId && this.responseIdPromise) {
+				try {
+					const resolvedId = await Promise.race([
+						this.responseIdPromise,
+						// Timeout after 100ms to avoid blocking too long
+						new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), 100)),
+					])
+					if (resolvedId) {
+						effectivePreviousResponseId = resolvedId
+					}
+				} catch {
+					// Non-fatal if promise fails
+				}
+			}
+
+			// Fall back to the last known response ID if still not available
+			if (!effectivePreviousResponseId) {
+				effectivePreviousResponseId = this.lastResponseId
+			}
+		}
+
+		// Format input and capture continuity id
+		const { formattedInput, previousResponseId } = this.prepareGpt5Input(systemPrompt, messages, metadata)
+		const requestPreviousResponseId = effectivePreviousResponseId ?? previousResponseId
+
+		// Create a new promise for this request's response ID
+		this.responseIdPromise = new Promise<string | undefined>((resolve) => {
+			this.responseIdResolver = resolve
+		})
+
+		// Build a request body (also used for fallback)
+		// Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
+		// so requests do not default to very large limits (e.g., 120k).
+		interface Gpt5RequestBody {
+			model: string
+			input: string
+			stream: boolean
+			reasoning?: { effort: ReasoningEffortWithMinimal; summary?: "auto" }
+			text?: { verbosity: VerbosityLevel }
+			temperature?: number
+			max_output_tokens?: number
+			previous_response_id?: string
+		}
 
-		// Prepare the request parameters for Responses API
-		const params: GPT5ResponsesAPIParams = {
+		const requestBody: Gpt5RequestBody = {
 			model: model.id,
 			input: formattedInput,
+			stream: true,
 			...(reasoningEffort && {
 				reasoning: {
 					effort: reasoningEffort,
+					...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
 				},
 			}),
-			text: {
-				verbosity: verbosity,
-			},
+			text: { verbosity: (verbosity || "medium") as VerbosityLevel },
+			temperature: this.options.modelTemperature ?? GPT5_DEFAULT_TEMPERATURE,
+			// Explicitly include the calculated max output tokens for GPT‑5.
+			// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
+			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
+			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
 		}
 
-		// Since the OpenAI SDK doesn't yet support the Responses API,
-		// we'll make a direct HTTP request
-		const response = await this.makeGpt5ResponsesAPIRequest(params, model)
+		try {
+			// Use the official SDK
+			const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>
 
-		yield* this.handleGpt5StreamResponse(response, model)
+			if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
+				throw new Error(
+					"OpenAI SDK did not return an AsyncIterable for Responses API streaming. Falling back to SSE.",
+				)
+			}
+
+			for await (const event of stream) {
+				for await (const outChunk of this.processGpt5Event(event, model)) {
+					yield outChunk
+				}
+			}
+		} catch (sdkErr: any) {
+			// Check if this is a 400 error about previous_response_id not found
+			const errorMessage = sdkErr?.message || sdkErr?.error?.message || ""
+			const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400
+			const isPreviousResponseError =
+				errorMessage.includes("Previous response") || errorMessage.includes("not found")
+
+			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
+				// Log the error and retry without the previous_response_id
+				console.warn(
+					`[GPT-5] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
+				)
+
+				// Remove the problematic previous_response_id and retry
+				const retryRequestBody = { ...requestBody }
+				delete retryRequestBody.previous_response_id
+
+				// Clear the stored lastResponseId to prevent using it again
+				this.lastResponseId = undefined
+
+				try {
+					// Retry with the SDK
+					const retryStream = (await (this.client as any).responses.create(
+						retryRequestBody,
+					)) as AsyncIterable<any>
+
+					if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
+						// If SDK fails, fall back to SSE
+						yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
+						return
+					}
+
+					for await (const event of retryStream) {
+						for await (const outChunk of this.processGpt5Event(event, model)) {
+							yield outChunk
+						}
+					}
+					return
+				} catch (retryErr) {
+					// If retry also fails, fall back to SSE
+					yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
+					return
+				}
+			}
+
+			// For other errors, fallback to manual SSE via fetch
+			yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata)
+		}
 	}
 
 	private formatInputForResponsesAPI(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string {
-		// Format the conversation for the Responses API's single input field
-		let formattedInput = `System: ${systemPrompt}\n\n`
+		// Format the conversation for the Responses API input field
+		// Use Developer role format for GPT-5 (aligning with o1/o3 Developer role usage per GPT-5 Responses guidance)
+		// This ensures consistent instruction handling across reasoning models
+		let formattedInput = `Developer: ${systemPrompt}\n\n`
 
 		for (const message of messages) {
 			const role = message.role === "user" ? "User" : "Assistant"
-			const content =
-				typeof message.content === "string"
-					? message.content
-					: message.content.map((c) => (c.type === "text" ? c.text : "[image]")).join(" ")
-			formattedInput += `${role}: ${content}\n\n`
+
+			// Handle text content
+			if (typeof message.content === "string") {
+				formattedInput += `${role}: ${message.content}\n\n`
+			} else if (Array.isArray(message.content)) {
+				// Handle content blocks
+				const textContent = message.content
+					.filter((block) => block.type === "text")
+					.map((block) => (block as any).text)
+					.join("\n")
+				if (textContent) {
+					formattedInput += `${role}: ${textContent}\n\n`
+				}
+			}
 		}
 
 		return formattedInput.trim()
 	}
 
-	private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined {
-		const { reasoning } = model
-
-		// Check if reasoning effort is configured
-		if (reasoning && "reasoning_effort" in reasoning) {
-			const effort = reasoning.reasoning_effort
-			// Support the new "minimal" effort level for GPT-5
-			if (effort === "low" || effort === "medium" || effort === "high") {
-				return effort
+	private formatSingleMessageForResponsesAPI(message: Anthropic.Messages.MessageParam): string {
+		// Format a single message for the Responses API when using previous_response_id
+		const role = message.role === "user" ? "User" : "Assistant"
+
+		// Handle text content
+		if (typeof message.content === "string") {
+			return `${role}: ${message.content}`
+		} else if (Array.isArray(message.content)) {
+			// Handle content blocks
+			const textContent = message.content
+				.filter((block) => block.type === "text")
+				.map((block) => (block as any).text)
+				.join("\n")
+			if (textContent) {
+				return `${role}: ${textContent}`
 			}
 		}
 
-		// Default to "minimal" for GPT-5 models when not specified
-		// This provides fastest time-to-first-token as per documentation
-		return "minimal"
+		return ""
 	}
 
-	private async makeGpt5ResponsesAPIRequest(
-		params: GPT5ResponsesAPIParams,
+	private async *makeGpt5ResponsesAPIRequest(
+		requestBody: any,
 		model: OpenAiNativeModel,
-	): Promise<AsyncIterable<GPT5ResponseChunk>> {
-		// The OpenAI SDK doesn't have direct support for the Responses API yet,
-		// but we can access it through the underlying client request method if available.
-		// For now, we'll use the Chat Completions API with GPT-5 specific formatting
-		// to maintain compatibility while the Responses API SDK support is being added.
-
-		// Convert Responses API params to Chat Completions format
-		// GPT-5 models use "developer" role for system messages
-		const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "developer", content: params.input }]
-
-		// Build the request parameters
-		const requestParams: any = {
-			model: params.model,
-			messages,
-			stream: true,
-			stream_options: { include_usage: true },
-		}
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
+		const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com"
+		const url = `${baseUrl}/v1/responses`
 
-		// Add reasoning effort if specified (supporting "minimal" for GPT-5)
-		if (params.reasoning?.effort) {
-			if (params.reasoning.effort === "minimal") {
-				// For minimal effort, we pass "minimal" as the reasoning_effort
-				requestParams.reasoning_effort = "minimal"
-			} else {
-				requestParams.reasoning_effort = params.reasoning.effort
+		try {
+			const response = await fetch(url, {
+				method: "POST",
+				headers: {
+					"Content-Type": "application/json",
+					Authorization: `Bearer ${apiKey}`,
+					Accept: "text/event-stream",
+				},
+				body: JSON.stringify(requestBody),
+			})
+
+			if (!response.ok) {
+				const errorText = await response.text()
+
+				let errorMessage = `GPT-5 API request failed (${response.status})`
+				let errorDetails = ""
+
+				// Try to parse error as JSON for better error messages
+				try {
+					const errorJson = JSON.parse(errorText)
+					if (errorJson.error?.message) {
+						errorDetails = errorJson.error.message
+					} else if (errorJson.message) {
+						errorDetails = errorJson.message
+					} else {
+						errorDetails = errorText
+					}
+				} catch {
+					// If not JSON, use the raw text
+					errorDetails = errorText
+				}
+
+				// Check if this is a 400 error about previous_response_id not found
+				const isPreviousResponseError =
+					errorDetails.includes("Previous response") || errorDetails.includes("not found")
+
+				if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
+					// Log the error and retry without the previous_response_id
+					console.warn(
+						`[GPT-5 SSE] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
+					)
+
+					// Remove the problematic previous_response_id and retry
+					const retryRequestBody = { ...requestBody }
+					delete retryRequestBody.previous_response_id
+
+					// Clear the stored lastResponseId to prevent using it again
+					this.lastResponseId = undefined
+					// Resolve the promise once to unblock any waiting requests
+					this.resolveResponseId(undefined)
+
+					// Retry the request without the previous_response_id
+					const retryResponse = await fetch(url, {
+						method: "POST",
+						headers: {
+							"Content-Type": "application/json",
+							Authorization: `Bearer ${apiKey}`,
+							Accept: "text/event-stream",
+						},
+						body: JSON.stringify(retryRequestBody),
+					})
+
+					if (!retryResponse.ok) {
+						// If retry also fails, throw the original error
+						throw new Error(`GPT-5 API retry failed (${retryResponse.status})`)
+					}
+
+					if (!retryResponse.body) {
+						throw new Error("GPT-5 Responses API error: No response body from retry request")
+					}
+
+					// Handle the successful retry response
+					yield* this.handleGpt5StreamResponse(retryResponse.body, model)
+					return
+				}
+
+				// Provide user-friendly error messages based on status code
+				switch (response.status) {
+					case 400:
+						errorMessage = "Invalid request to GPT-5 API. Please check your input parameters."
+						break
+					case 401:
+						errorMessage = "Authentication failed. Please check your OpenAI API key."
+						break
+					case 403:
+						errorMessage = "Access denied. Your API key may not have access to GPT-5 models."
+						break
+					case 404:
+						errorMessage =
+							"GPT-5 API endpoint not found. The model may not be available yet or requires a different configuration."
+						break
+					case 429:
+						errorMessage = "Rate limit exceeded. Please try again later."
+						break
+					case 500:
+					case 502:
+					case 503:
+						errorMessage = "OpenAI service error. Please try again later."
+						break
+					default:
+						errorMessage = `GPT-5 API error (${response.status})`
+				}
+
+				// Append details if available
+				if (errorDetails) {
+					errorMessage += ` - ${errorDetails}`
+				}
+
+				throw new Error(errorMessage)
+			}
+
+			if (!response.body) {
+				throw new Error("GPT-5 Responses API error: No response body")
+			}
+
+			// Handle streaming response
+			yield* this.handleGpt5StreamResponse(response.body, model)
+		} catch (error) {
+			if (error instanceof Error) {
+				// Re-throw with the original error message if it's already formatted
+				if (error.message.includes("GPT-5")) {
+					throw error
+				}
+				// Otherwise, wrap it with context
+				throw new Error(`Failed to connect to GPT-5 API: ${error.message}`)
 			}
+			// Handle non-Error objects
+			throw new Error(`Unexpected error connecting to GPT-5 API`)
 		}
+	}
 
-		// Add verbosity control for GPT-5 models
-		// According to the docs, Chat Completions API also supports verbosity parameter
-		if (params.text?.verbosity) {
-			requestParams.verbosity = params.text.verbosity
+	/**
+	 * Prepares the input and conversation continuity parameters for a GPT-5 API call.
+	 *
+	 * - If a `previousResponseId` is available (either from metadata or the handler's state),
+	 *   it formats only the most recent user message for the input and returns the response ID
+	 *   to maintain conversation context.
+	 * - Otherwise, it formats the entire conversation history (system prompt + messages) for the input.
+	 *
+	 * @returns An object containing the formatted input string and the previous response ID (if used).
+	 */
+	private prepareGpt5Input(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): { formattedInput: string; previousResponseId?: string } {
+		// Respect explicit suppression signal for continuity (e.g. immediately after condense)
+		const isFirstMessage = messages.length === 1 && messages[0].role === "user"
+		const allowFallback = !metadata?.suppressPreviousResponseId
+
+		const previousResponseId =
+			metadata?.previousResponseId ?? (allowFallback && !isFirstMessage ? this.lastResponseId : undefined)
+
+		if (previousResponseId) {
+			const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user")
+			const formattedInput = lastUserMessage ? this.formatSingleMessageForResponsesAPI(lastUserMessage) : ""
+			return { formattedInput, previousResponseId }
+		} else {
+			const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages)
+			return { formattedInput }
 		}
+	}
 
-		const stream = (await this.client.chat.completions.create(
-			requestParams,
-		)) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>
+	/**
+	 * Handles the streaming response from the GPT-5 Responses API.
+	 *
+	 * This function iterates through the Server-Sent Events (SSE) stream, parses each event,
+	 * and yields structured data chunks (`ApiStream`). It handles a wide variety of event types,
+	 * including text deltas, reasoning, usage data, and various status/tool events.
+	 *
+	 * The following event types are intentionally ignored as they are not currently consumed
+	 * by the client application:
+	 * - Audio events (`response.audio.*`)
+	 * - Most tool call events (e.g., `response.function_call_arguments.*`, `response.mcp_call.*`, etc.)
+	 *   as the client does not yet support rendering these tool interactions.
+	 * - Status events (`response.created`, `response.in_progress`, etc.) as they are informational
+	 *   and do not affect the final output.
+	 */
+	private async *handleGpt5StreamResponse(body: ReadableStream<Uint8Array>, model: OpenAiNativeModel): ApiStream {
+		const reader = body.getReader()
+		const decoder = new TextDecoder()
+		let buffer = ""
+		let hasContent = false
+		let totalInputTokens = 0
+		let totalOutputTokens = 0
 
-		// Convert the stream to GPT-5 response format
-		return this.convertChatStreamToGpt5Format(stream)
+		try {
+			while (true) {
+				const { done, value } = await reader.read()
+				if (done) break
+
+				buffer += decoder.decode(value, { stream: true })
+				const lines = buffer.split("\n")
+				buffer = lines.pop() || ""
+
+				for (const line of lines) {
+					if (line.startsWith("data: ")) {
+						const data = line.slice(6).trim()
+						if (data === "[DONE]") {
+							continue
+						}
+
+						try {
+							const parsed = JSON.parse(data)
+
+							// Store response ID for conversation continuity
+							if (parsed.response?.id) {
+								this.resolveResponseId(parsed.response.id)
+							}
+
+							// Delegate standard event types to the shared processor to avoid duplication
+							if (parsed?.type && this.gpt5CoreHandledTypes.has(parsed.type)) {
+								for await (const outChunk of this.processGpt5Event(parsed, model)) {
+									// Track whether we've emitted any content so fallback handling can decide appropriately
+									if (outChunk.type === "text" || outChunk.type === "reasoning") {
+										hasContent = true
+									}
+									yield outChunk
+								}
+								continue
+							}
+
+							// Check if this is a complete response (non-streaming format)
+							if (parsed.response && parsed.response.output && Array.isArray(parsed.response.output)) {
+								// Handle complete response in the initial event
+								for (const outputItem of parsed.response.output) {
+									if (outputItem.type === "text" && outputItem.content) {
+										for (const content of outputItem.content) {
+											if (content.type === "text" && content.text) {
+												hasContent = true
+												yield {
+													type: "text",
+													text: content.text,
+												}
+											}
+										}
+									}
+									// Additionally handle reasoning summaries if present (non-streaming summary output)
+									if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) {
+										for (const summary of outputItem.summary) {
+											if (summary?.type === "summary_text" && typeof summary.text === "string") {
+												hasContent = true
+												yield {
+													type: "reasoning",
+													text: summary.text,
+												}
+											}
+										}
+									}
+								}
+								// Check for usage in the complete response
+								if (parsed.response.usage) {
+									const usageData = this.normalizeGpt5Usage(parsed.response.usage, model)
+									if (usageData) {
+										yield usageData
+									}
+								}
+							}
+							// Handle streaming delta events for text content
+							else if (
+								parsed.type === "response.text.delta" ||
+								parsed.type === "response.output_text.delta"
+							) {
+								// Primary streaming event for text deltas
+								if (parsed.delta) {
+									hasContent = true
+									yield {
+										type: "text",
+										text: parsed.delta,
+									}
+								}
+							} else if (
+								parsed.type === "response.text.done" ||
+								parsed.type === "response.output_text.done"
+							) {
+								// Text streaming completed - final text already streamed via deltas
+							}
+							// Handle reasoning delta events
+							else if (
+								parsed.type === "response.reasoning.delta" ||
+								parsed.type === "response.reasoning_text.delta"
+							) {
+								// Streaming reasoning content
+								if (parsed.delta) {
+									hasContent = true
+									yield {
+										type: "reasoning",
+										text: parsed.delta,
+									}
+								}
+							} else if (
+								parsed.type === "response.reasoning.done" ||
+								parsed.type === "response.reasoning_text.done"
+							) {
+								// Reasoning streaming completed
+							}
+							// Handle reasoning summary events
+							else if (
+								parsed.type === "response.reasoning_summary.delta" ||
+								parsed.type === "response.reasoning_summary_text.delta"
+							) {
+								// Streaming reasoning summary
+								if (parsed.delta) {
+									hasContent = true
+									yield {
+										type: "reasoning",
+										text: parsed.delta,
+									}
+								}
+							} else if (
+								parsed.type === "response.reasoning_summary.done" ||
+								parsed.type === "response.reasoning_summary_text.done"
+							) {
+								// Reasoning summary completed
+							}
+							// Handle refusal delta events
+							else if (parsed.type === "response.refusal.delta") {
+								// Model is refusing to answer
+								if (parsed.delta) {
+									hasContent = true
+									yield {
+										type: "text",
+										text: `[Refusal] ${parsed.delta}`,
+									}
+								}
+							} else if (parsed.type === "response.refusal.done") {
+								// Refusal completed
+							}
+							// Handle audio delta events (for multimodal responses)
+							else if (parsed.type === "response.audio.delta") {
+								// Audio streaming - we'll skip for now as we focus on text
+								// Could be handled in future for voice responses
+							} else if (parsed.type === "response.audio.done") {
+								// Audio completed
+							}
+							// Handle audio transcript delta events
+							else if (parsed.type === "response.audio_transcript.delta") {
+								// Audio transcript streaming
+								if (parsed.delta) {
+									hasContent = true
+									yield {
+										type: "text",
+										text: parsed.delta,
+									}
+								}
+							} else if (parsed.type === "response.audio_transcript.done") {
+								// Audio transcript completed
+							}
+							// Handle content part events (for structured content)
+							else if (parsed.type === "response.content_part.added") {
+								// New content part added - could be text, image, etc.
+								if (parsed.part?.type === "text" && parsed.part.text) {
+									hasContent = true
+									yield {
+										type: "text",
+										text: parsed.part.text,
+									}
+								}
+							} else if (parsed.type === "response.content_part.done") {
+								// Content part completed
+							}
+							// Handle output item events (alternative format)
+							else if (parsed.type === "response.output_item.added") {
+								// This is where the actual content comes through in some test cases
+								if (parsed.item) {
+									if (parsed.item.type === "text" && parsed.item.text) {
+										hasContent = true
+										yield { type: "text", text: parsed.item.text }
+									} else if (parsed.item.type === "reasoning" && parsed.item.text) {
+										hasContent = true
+										yield { type: "reasoning", text: parsed.item.text }
+									} else if (parsed.item.type === "message" && parsed.item.content) {
+										// Handle message type items
+										for (const content of parsed.item.content) {
+											if (content.type === "text" && content.text) {
+												hasContent = true
+												yield { type: "text", text: content.text }
+											}
+										}
+									}
+								}
+							} else if (parsed.type === "response.output_item.done") {
+								// Output item completed
+							}
+							// Handle function/tool call events
+							else if (parsed.type === "response.function_call_arguments.delta") {
+								// Function call arguments streaming
+								// We could yield this as a special type if needed for tool usage
+							} else if (parsed.type === "response.function_call_arguments.done") {
+								// Function call completed
+							}
+							// Handle MCP (Model Context Protocol) tool events
+							else if (parsed.type === "response.mcp_call_arguments.delta") {
+								// MCP tool call arguments streaming
+							} else if (parsed.type === "response.mcp_call_arguments.done") {
+								// MCP tool call completed
+							} else if (parsed.type === "response.mcp_call.in_progress") {
+								// MCP tool call in progress
+							} else if (
+								parsed.type === "response.mcp_call.completed" ||
+								parsed.type === "response.mcp_call.failed"
+							) {
+								// MCP tool call status events
+							} else if (parsed.type === "response.mcp_list_tools.in_progress") {
+								// MCP list tools in progress
+							} else if (
+								parsed.type === "response.mcp_list_tools.completed" ||
+								parsed.type === "response.mcp_list_tools.failed"
+							) {
+								// MCP list tools status events
+							}
+							// Handle web search events
+							else if (parsed.type === "response.web_search_call.searching") {
+								// Web search in progress
+							} else if (parsed.type === "response.web_search_call.in_progress") {
+								// Processing web search results
+							} else if (parsed.type === "response.web_search_call.completed") {
+								// Web search completed
+							}
+							// Handle code interpreter events
+							else if (parsed.type === "response.code_interpreter_call_code.delta") {
+								// Code interpreter code streaming
+								if (parsed.delta) {
+									// Could yield as a special code type if needed
+								}
+							} else if (parsed.type === "response.code_interpreter_call_code.done") {
+								// Code interpreter code completed
+							} else if (parsed.type === "response.code_interpreter_call.interpreting") {
+								// Code interpreter running
+							} else if (parsed.type === "response.code_interpreter_call.in_progress") {
+								// Code execution in progress
+							} else if (parsed.type === "response.code_interpreter_call.completed") {
+								// Code interpreter completed
+							}
+							// Handle file search events
+							else if (parsed.type === "response.file_search_call.searching") {
+								// File search in progress
+							} else if (parsed.type === "response.file_search_call.in_progress") {
+								// Processing file search results
+							} else if (parsed.type === "response.file_search_call.completed") {
+								// File search completed
+							}
+							// Handle image generation events
+							else if (parsed.type === "response.image_gen_call.generating") {
+								// Image generation in progress
+							} else if (parsed.type === "response.image_gen_call.in_progress") {
+								// Processing image generation
+							} else if (parsed.type === "response.image_gen_call.partial_image") {
+								// Image partially generated
+							} else if (parsed.type === "response.image_gen_call.completed") {
+								// Image generation completed
+							}
+							// Handle computer use events
+							else if (
+								parsed.type === "response.computer_tool_call.output_item" ||
+								parsed.type === "response.computer_tool_call.output_screenshot"
+							) {
+								// Computer use tool events
+							}
+							// Handle annotation events
+							else if (
+								parsed.type === "response.output_text_annotation.added" ||
+								parsed.type === "response.text_annotation.added"
+							) {
+								// Text annotation events - could be citations, references, etc.
+							}
+							// Handle error events
+							else if (parsed.type === "response.error" || parsed.type === "error") {
+								// Error event from the API
+								if (parsed.error || parsed.message) {
+									throw new Error(
+										`GPT-5 API error: ${parsed.error?.message || parsed.message || "Unknown error"}`,
+									)
+								}
+							}
+							// Handle incomplete event
+							else if (parsed.type === "response.incomplete") {
+								// Response was incomplete - might need to handle specially
+							}
+							// Handle queued event
+							else if (parsed.type === "response.queued") {
+								// Response is queued
+							}
+							// Handle in_progress event
+							else if (parsed.type === "response.in_progress") {
+								// Response is being processed
+							}
+							// Handle failed event
+							else if (parsed.type === "response.failed") {
+								// Response failed
+								if (parsed.error || parsed.message) {
+									throw new Error(
+										`GPT-5 response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`,
+									)
+								}
+							} else if (parsed.type === "response.completed" || parsed.type === "response.done") {
+								// Store response ID for conversation continuity
+								if (parsed.response?.id) {
+									this.resolveResponseId(parsed.response.id)
+								}
+
+								// Check if the done event contains the complete output (as a fallback)
+								if (
+									!hasContent &&
+									parsed.response &&
+									parsed.response.output &&
+									Array.isArray(parsed.response.output)
+								) {
+									for (const outputItem of parsed.response.output) {
+										if (outputItem.type === "message" && outputItem.content) {
+											for (const content of outputItem.content) {
+												if (content.type === "output_text" && content.text) {
+													hasContent = true
+													yield {
+														type: "text",
+														text: content.text,
+													}
+												}
+											}
+										}
+										// Also surface reasoning summaries if present in the final output
+										if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) {
+											for (const summary of outputItem.summary) {
+												if (
+													summary?.type === "summary_text" &&
+													typeof summary.text === "string"
+												) {
+													hasContent = true
+													yield {
+														type: "reasoning",
+														text: summary.text,
+													}
+												}
+											}
+										}
+									}
+								}
+
+								// Usage for done/completed is already handled by processGpt5Event in SDK path.
+								// For SSE path, usage often arrives separately; avoid double-emitting here.
+							}
+							// These are structural or status events, we can just log them at a lower level or ignore.
+							else if (
+								parsed.type === "response.created" ||
+								parsed.type === "response.in_progress" ||
+								parsed.type === "response.output_item.done" ||
+								parsed.type === "response.content_part.added" ||
+								parsed.type === "response.content_part.done"
+							) {
+								// Status events - no action needed
+							}
+							// Fallback for older formats or unexpected responses
+							else if (parsed.choices?.[0]?.delta?.content) {
+								hasContent = true
+								yield {
+									type: "text",
+									text: parsed.choices[0].delta.content,
+								}
+							}
+							// Additional fallback: some events place text under 'item.text' even if type isn't matched above
+							else if (
+								parsed.item &&
+								typeof parsed.item.text === "string" &&
+								parsed.item.text.length > 0
+							) {
+								hasContent = true
+								yield {
+									type: "text",
+									text: parsed.item.text,
+								}
+							} else if (parsed.usage) {
+								// Handle usage if it arrives in a separate, non-completed event
+								const usageData = this.normalizeGpt5Usage(parsed.usage, model)
+								if (usageData) {
+									yield usageData
+								}
+							}
+						} catch (e) {
+							// Silently ignore parsing errors for non-critical SSE data
+						}
+					}
+					// Also try to parse non-SSE formatted lines
+					else if (line.trim() && !line.startsWith(":")) {
+						try {
+							const parsed = JSON.parse(line)
+
+							// Try to extract content from various possible locations
+							if (parsed.content || parsed.text || parsed.message) {
+								hasContent = true
+								yield {
+									type: "text",
+									text: parsed.content || parsed.text || parsed.message,
+								}
+							}
+						} catch {
+							// Not JSON, might be plain text - ignore
+						}
+					}
+				}
+			}
+
+			// If we didn't get any content, don't throw - the API might have returned an empty response
+			// This can happen in certain edge cases and shouldn't break the flow
+		} catch (error) {
+			if (error instanceof Error) {
+				throw new Error(`Error processing GPT-5 response stream: ${error.message}`)
+			}
+			throw new Error("Unexpected error processing GPT-5 response stream")
+		} finally {
+			reader.releaseLock()
+		}
 	}
 
-	private async *convertChatStreamToGpt5Format(
-		stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
-	): AsyncIterable<GPT5ResponseChunk> {
-		for await (const chunk of stream) {
-			const delta = chunk.choices[0]?.delta
+	/**
+	 * Shared processor for GPT‑5 Responses API events.
+	 * Used by both the official SDK streaming path and (optionally) by the SSE fallback.
+	 */
+	private async *processGpt5Event(event: any, model: OpenAiNativeModel): ApiStream {
+		// Persist response id for conversation continuity when available
+		if (event?.response?.id) {
+			this.resolveResponseId(event.response.id)
+		}
 
-			if (delta?.content) {
-				yield {
-					type: "text",
-					text: delta.content,
-				}
+		// Handle known streaming text deltas
+		if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
+			if (event?.delta) {
+				yield { type: "text", text: event.delta }
 			}
+			return
+		}
 
-			if (chunk.usage) {
-				yield {
-					type: "usage",
-					usage: {
-						input_tokens: chunk.usage.prompt_tokens || 0,
-						output_tokens: chunk.usage.completion_tokens || 0,
-						total_tokens: chunk.usage.total_tokens || 0,
-					},
+		// Handle reasoning deltas (including summary variants)
+		if (
+			event?.type === "response.reasoning.delta" ||
+			event?.type === "response.reasoning_text.delta" ||
+			event?.type === "response.reasoning_summary.delta" ||
+			event?.type === "response.reasoning_summary_text.delta"
+		) {
+			if (event?.delta) {
+				yield { type: "reasoning", text: event.delta }
+			}
+			return
+		}
+
+		// Handle refusal deltas
+		if (event?.type === "response.refusal.delta") {
+			if (event?.delta) {
+				yield { type: "text", text: `[Refusal] ${event.delta}` }
+			}
+			return
+		}
+
+		// Handle output item additions (SDK or Responses API alternative format)
+		if (event?.type === "response.output_item.added") {
+			const item = event?.item
+			if (item) {
+				if (item.type === "text" && item.text) {
+					yield { type: "text", text: item.text }
+				} else if (item.type === "reasoning" && item.text) {
+					yield { type: "reasoning", text: item.text }
+				} else if (item.type === "message" && Array.isArray(item.content)) {
+					for (const content of item.content) {
+						// Some implementations send 'text'; others send 'output_text'
+						if ((content?.type === "text" || content?.type === "output_text") && content?.text) {
+							yield { type: "text", text: content.text }
+						}
+					}
 				}
 			}
+			return
+		}
+
+		// Completion events that may carry usage
+		if (event?.type === "response.done" || event?.type === "response.completed") {
+			const usage = event?.response?.usage || event?.usage || undefined
+			const usageData = this.normalizeGpt5Usage(usage, model)
+			if (usageData) {
+				yield usageData
+			}
+			return
+		}
+
+		// Fallbacks for older formats or unexpected objects
+		if (event?.choices?.[0]?.delta?.content) {
+			yield { type: "text", text: event.choices[0].delta.content }
+			return
+		}
+
+		if (event?.usage) {
+			const usageData = this.normalizeGpt5Usage(event.usage, model)
+			if (usageData) {
+				yield usageData
+			}
 		}
 	}
 
-	private async *handleGpt5StreamResponse(
-		stream: AsyncIterable<GPT5ResponseChunk>,
-		model: OpenAiNativeModel,
-	): ApiStream {
-		for await (const chunk of stream) {
-			if (chunk.type === "text" && chunk.text) {
-				yield {
-					type: "text",
-					text: chunk.text,
-				}
-			} else if (chunk.type === "usage" && chunk.usage) {
-				const inputTokens = chunk.usage.input_tokens
-				const outputTokens = chunk.usage.output_tokens
-				const cacheReadTokens = 0
-				const cacheWriteTokens = 0
-				const totalCost = calculateApiCostOpenAI(
-					model.info,
-					inputTokens,
-					outputTokens,
-					cacheWriteTokens,
-					cacheReadTokens,
-				)
+	private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined {
+		const { reasoning, info } = model
 
-				yield {
-					type: "usage",
-					inputTokens,
-					outputTokens,
-					cacheWriteTokens,
-					cacheReadTokens,
-					totalCost,
-				}
+		// Check if reasoning effort is configured
+		if (reasoning && "reasoning_effort" in reasoning) {
+			const effort = reasoning.reasoning_effort as string
+			// Support all effort levels including "minimal" for GPT-5
+			if (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high") {
+				return effort as ReasoningEffortWithMinimal
 			}
 		}
+
+		// Centralize default: use the model's default from types if available; otherwise undefined
+		return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
 	}
 
 	private isGpt5Model(modelId: string): boolean {
@@ -376,16 +1152,28 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	}
 
 	private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
-		const inputTokens = usage?.prompt_tokens || 0 // sum of cache hits and misses
+		const inputTokens = usage?.prompt_tokens || 0
 		const outputTokens = usage?.completion_tokens || 0
-		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
-		const cacheWriteTokens = 0
-		const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
-		const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens)
+
+		// Extract cache tokens from prompt_tokens_details
+		// According to OpenAI API, cached_tokens represents tokens read from cache
+		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || undefined
+
+		// Cache write tokens are not typically reported in the standard streaming response
+		// They would be in cache_creation_input_tokens if available
+		const cacheWriteTokens = (usage as any)?.cache_creation_input_tokens || undefined
+
+		const totalCost = calculateApiCostOpenAI(
+			info,
+			inputTokens,
+			outputTokens,
+			cacheWriteTokens || 0,
+			cacheReadTokens || 0,
+		)
 
 		yield {
 			type: "usage",
-			inputTokens: nonCachedInputTokens,
+			inputTokens: inputTokens,
 			outputTokens: outputTokens,
 			cacheWriteTokens: cacheWriteTokens,
 			cacheReadTokens: cacheReadTokens,
@@ -406,15 +1194,17 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			modelId: id,
 			model: info,
 			settings: this.options,
-			defaultTemperature: OPENAI_NATIVE_DEFAULT_TEMPERATURE,
+			defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE,
 		})
 
 		// For GPT-5 models, ensure we support minimal reasoning effort
-		if (this.isGpt5Model(id) && params.reasoning) {
-			// Allow "minimal" effort for GPT-5 models
-			const effort = this.options.reasoningEffort
-			if (effort === "low" || effort === "medium" || effort === "high") {
-				params.reasoning.reasoning_effort = effort
+		if (this.isGpt5Model(id)) {
+			const effort =
+				(this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ??
+				(info.reasoningEffort as ReasoningEffortWithMinimal | undefined)
+
+			if (effort) {
+				;(params.reasoning as any) = { reasoning_effort: effort }
 			}
 		}
 
@@ -423,25 +1213,62 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return { id: id.startsWith("o3-mini") ? "o3-mini" : id, info, ...params, verbosity: params.verbosity }
 	}
 
+	/**
+	 * Gets the last GPT-5 response ID captured from the Responses API stream.
+	 * Used for maintaining conversation continuity across requests.
+	 * @returns The response ID, or undefined if not available yet
+	 */
+	getLastResponseId(): string | undefined {
+		return this.lastResponseId
+	}
+
+	/**
+	 * Sets the last GPT-5 response ID for conversation continuity.
+	 * Typically only used in tests or special flows.
+	 * @param responseId The GPT-5 response ID to store
+	 */
+	setResponseId(responseId: string): void {
+		this.lastResponseId = responseId
+	}
+
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const { id, temperature, reasoning, verbosity } = this.getModel()
+			const isGpt5 = this.isGpt5Model(id)
 
-			const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming & {
-				verbosity?: VerbosityLevel
-			} = {
+			if (isGpt5) {
+				// GPT-5 uses the Responses API, not Chat Completions. Avoid undefined behavior here.
+				throw new Error(
+					"completePrompt is not supported for GPT-5 models. Use createMessage (Responses API) instead.",
+				)
+			}
+
+			const params: any = {
 				model: id,
 				messages: [{ role: "user", content: prompt }],
-				temperature,
-				...(reasoning && reasoning),
 			}
 
-			// Add verbosity for GPT-5 models
-			if (this.isGpt5Model(id) && verbosity) {
-				params.verbosity = verbosity
+			// Add temperature if supported
+			if (temperature !== undefined) {
+				params.temperature = temperature
+			}
+
+			// For GPT-5 models, add reasoning_effort and verbosity as top-level parameters
+			if (isGpt5) {
+				if (reasoning && "reasoning_effort" in reasoning) {
+					params.reasoning_effort = reasoning.reasoning_effort
+				}
+				if (verbosity) {
+					params.verbosity = verbosity
+				}
+			} else {
+				// For non-GPT-5 models, add reasoning as is
+				if (reasoning) {
+					Object.assign(params, reasoning)
+				}
 			}
 
-			const response = await this.client.chat.completions.create(params as any)
+			const response = await this.client.chat.completions.create(params)
 			return response.choices[0]?.message.content || ""
 		} catch (error) {
 			if (error instanceof Error) {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 85abcf1a69..eed719cf0f 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -305,7 +305,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				],
 				stream: true,
 				...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
-				reasoning_effort: modelInfo.reasoningEffort,
+				reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined,
 				temperature: undefined,
 			}
 
@@ -330,7 +330,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					},
 					...convertToOpenAiMessages(messages),
 				],
-				reasoning_effort: modelInfo.reasoningEffort,
+				reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined,
 				temperature: undefined,
 			}
 
diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts
index 8af0b9aa42..d2e55fc8f0 100644
--- a/src/api/providers/requesty.ts
+++ b/src/api/providers/requesty.ts
@@ -116,7 +116,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 			model,
 			max_tokens,
 			temperature,
-			...(reasoning_effort && { reasoning_effort }),
+			...(reasoning_effort && reasoning_effort !== "minimal" && { reasoning_effort }),
 			...(thinking && { thinking }),
 			stream: true,
 			stream_options: { include_usage: true },
diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts
index cc30aa5605..933697c0a5 100644
--- a/src/api/transform/model-params.ts
+++ b/src/api/transform/model-params.ts
@@ -2,6 +2,7 @@ import {
 	type ModelInfo,
 	type ProviderSettings,
 	type VerbosityLevel,
+	type ReasoningEffortWithMinimal,
 	ANTHROPIC_DEFAULT_MAX_TOKENS,
 } from "@roo-code/types"
 
@@ -38,7 +39,7 @@ type GetModelParamsOptions<T extends Format> = {
 type BaseModelParams = {
 	maxTokens: number | undefined
 	temperature: number | undefined
-	reasoningEffort: "low" | "medium" | "high" | undefined
+	reasoningEffort: ReasoningEffortWithMinimal | undefined
 	reasoningBudget: number | undefined
 	verbosity: VerbosityLevel | undefined
 }
@@ -128,7 +129,8 @@ export function getModelParams({
 		temperature = 1.0
 	} else if (shouldUseReasoningEffort({ model, settings })) {
 		// "Traditional" reasoning models use the `reasoningEffort` parameter.
-		reasoningEffort = customReasoningEffort ?? model.reasoningEffort
+		const effort = customReasoningEffort ?? model.reasoningEffort
+		reasoningEffort = effort as ReasoningEffortWithMinimal
 	}
 
 	const params: BaseModelParams = { maxTokens, temperature, reasoningEffort, reasoningBudget, verbosity }
diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts
index a173c59b19..46ef029ea3 100644
--- a/src/api/transform/reasoning.ts
+++ b/src/api/transform/reasoning.ts
@@ -2,7 +2,7 @@ import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
 import OpenAI from "openai"
 import type { GenerateContentConfig } from "@google/genai"
 
-import type { ModelInfo, ProviderSettings } from "@roo-code/types"
+import type { ModelInfo, ProviderSettings, ReasoningEffortWithMinimal } from "@roo-code/types"
 
 import { shouldUseReasoningBudget, shouldUseReasoningEffort } from "../../shared/api"
 
@@ -23,7 +23,7 @@ export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"]
 export type GetModelReasoningOptions = {
 	model: ModelInfo
 	reasoningBudget: number | undefined
-	reasoningEffort: ReasoningEffort | undefined
+	reasoningEffort: ReasoningEffortWithMinimal | undefined
 	settings: ProviderSettings
 }
 
@@ -36,7 +36,9 @@ export const getOpenRouterReasoning = ({
 	shouldUseReasoningBudget({ model, settings })
 		? { max_tokens: reasoningBudget }
 		: shouldUseReasoningEffort({ model, settings })
-			? { effort: reasoningEffort }
+			? reasoningEffort !== "minimal"
+				? { effort: reasoningEffort }
+				: undefined
 			: undefined
 
 export const getAnthropicReasoning = ({
@@ -50,8 +52,19 @@ export const getOpenAiReasoning = ({
 	model,
 	reasoningEffort,
 	settings,
-}: GetModelReasoningOptions): OpenAiReasoningParams | undefined =>
-	shouldUseReasoningEffort({ model, settings }) ? { reasoning_effort: reasoningEffort } : undefined
+}: GetModelReasoningOptions): OpenAiReasoningParams | undefined => {
+	if (!shouldUseReasoningEffort({ model, settings })) {
+		return undefined
+	}
+
+	// If model has reasoning effort capability, return object even if effort is undefined
+	// This preserves the reasoning_effort field in the API call
+	if (reasoningEffort === "minimal") {
+		return undefined
+	}
+
+	return { reasoning_effort: reasoningEffort }
+}
 
 export const getGeminiReasoning = ({
 	model,
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 3cb6abe7f7..1dd615f0eb 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -252,6 +252,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	didCompleteReadingStream = false
 	assistantMessageParser?: AssistantMessageParser
 	isAssistantMessageParserEnabled = false
+	private lastUsedInstructions?: string
+	private skipPrevResponseIdOnce: boolean = false
 
 	constructor({
 		provider,
@@ -824,6 +826,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		progressStatus?: ToolProgressStatus,
 		options: {
 			isNonInteractive?: boolean
+			metadata?: Record<string, unknown>
 		} = {},
 		contextCondense?: ContextCondense,
 	): Promise<undefined> {
@@ -861,6 +864,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						images,
 						partial,
 						contextCondense,
+						metadata: options.metadata,
 					})
 				}
 			} else {
@@ -876,6 +880,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					lastMessage.images = images
 					lastMessage.partial = false
 					lastMessage.progressStatus = progressStatus
+					if (options.metadata) {
+						;(lastMessage as any).metadata = options.metadata
+					}
 
 					// Instead of streaming partialMessage events, we do a save
 					// and post like normal to persist to disk.
@@ -891,7 +898,15 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						this.lastMessageTs = sayTs
 					}
 
-					await this.addToClineMessages({ ts: sayTs, type: "say", say: type, text, images, contextCondense })
+					await this.addToClineMessages({
+						ts: sayTs,
+						type: "say",
+						say: type,
+						text,
+						images,
+						contextCondense,
+						metadata: options.metadata,
+					})
 				}
 			}
 		} else {
@@ -1736,6 +1751,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				presentAssistantMessage(this)
 			}
 
+			await this.persistGpt5Metadata(reasoningMessage)
+
 			updateApiReqMsg()
 			await this.saveClineMessages()
 			await this.providerRef.deref()?.postStateToWebview()
@@ -1954,6 +1971,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		Task.lastGlobalApiRequestTime = Date.now()
 
 		const systemPrompt = await this.getSystemPrompt()
+		this.lastUsedInstructions = systemPrompt
 		const { contextTokens } = this.getTokenUsage()
 
 		if (contextTokens) {
@@ -1992,6 +2010,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			if (truncateResult.error) {
 				await this.say("condense_context_error", truncateResult.error)
 			} else if (truncateResult.summary) {
+				// A condense operation occurred; for the next GPT‑5 API call we should NOT
+				// send previous_response_id so the request reflects the fresh condensed context.
+				this.skipPrevResponseIdOnce = true
+
 				const { summary, cost, prevContextTokens, newContextTokens = 0 } = truncateResult
 				const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens }
 				await this.say(
@@ -2008,7 +2030,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 
 		const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory)
-		const cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
+		let cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
 			({ role, content }) => ({ role, content }),
 		)
 
@@ -2024,9 +2046,41 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			throw new Error("Auto-approval limit reached and user did not approve continuation")
 		}
 
+		// Determine GPT‑5 previous_response_id from last persisted assistant turn (if available),
+		// unless a condense just occurred (skip once after condense).
+		let previousResponseId: string | undefined = undefined
+		try {
+			const modelId = this.api.getModel().id
+			if (modelId && modelId.startsWith("gpt-5") && !this.skipPrevResponseIdOnce) {
+				// Find the last assistant message that has a previous_response_id stored
+				const idx = findLastIndex(
+					this.clineMessages,
+					(m) =>
+						m.type === "say" &&
+						(m as any).say === "text" &&
+						(m as any).metadata?.gpt5?.previous_response_id,
+				)
+				if (idx !== -1) {
+					// Use the previous_response_id from the last assistant message for this request
+					previousResponseId = ((this.clineMessages[idx] as any).metadata.gpt5.previous_response_id ||
+						undefined) as string | undefined
+				}
+			}
+		} catch {
+			// non-fatal
+		}
+
 		const metadata: ApiHandlerCreateMessageMetadata = {
 			mode: mode,
 			taskId: this.taskId,
+			...(previousResponseId ? { previousResponseId } : {}),
+			// If a condense just occurred, explicitly suppress continuity fallback for the next call
+			...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}),
+		}
+
+		// Reset skip flag after applying (it only affects the immediate next call)
+		if (this.skipPrevResponseIdOnce) {
+			this.skipPrevResponseIdOnce = false
 		}
 
 		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, metadata)
@@ -2172,6 +2226,35 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 	}
 
+	/**
+	 * Persist GPT-5 per-turn metadata (previous_response_id, instructions, reasoning_summary)
+	 * onto the last complete assistant say("text") message.
+	 */
+	private async persistGpt5Metadata(reasoningMessage?: string): Promise<void> {
+		try {
+			const modelId = this.api.getModel().id
+			if (!modelId || !modelId.startsWith("gpt-5")) return
+
+			const lastResponseId: string | undefined = (this.api as any)?.getLastResponseId?.()
+			const idx = findLastIndex(
+				this.clineMessages,
+				(m) => m.type === "say" && (m as any).say === "text" && m.partial !== true,
+			)
+			if (idx !== -1) {
+				const msg = this.clineMessages[idx] as any
+				msg.metadata = msg.metadata ?? {}
+				msg.metadata.gpt5 = {
+					...(msg.metadata.gpt5 ?? {}),
+					previous_response_id: lastResponseId,
+					instructions: this.lastUsedInstructions,
+					reasoning_summary: (reasoningMessage ?? "").trim() || undefined,
+				}
+			}
+		} catch {
+			// Non-fatal error in metadata persistence
+		}
+	}
+
 	// Getters
 
 	public get cwd() {
diff --git a/src/shared/api.ts b/src/shared/api.ts
index 014b903453..e9b57af3c1 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -6,8 +6,15 @@ import {
 } from "@roo-code/types"
 
 // ApiHandlerOptions
-
-export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider">
+// Extend ProviderSettings (minus apiProvider) with handler-specific toggles.
+export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider"> & {
+	/**
+	 * When true and using GPT‑5 Responses API, include reasoning.summary: "auto"
+	 * so the API returns reasoning summaries (we already parse and surface them).
+	 * Defaults to true; set to false to disable summaries.
+	 */
+	enableGpt5ReasoningSummary?: boolean
+}
 
 // RouterName
 
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 74ba885d25..70a58f03bf 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -576,6 +576,12 @@ const ApiOptions = ({
 								if (value !== "custom-arn" && selectedProvider === "bedrock") {
 									setApiConfigurationField("awsCustomArn", "")
 								}
+
+								// Clear reasoning effort when switching models to allow the new model's default to take effect
+								// This is especially important for GPT-5 models which default to "medium"
+								if (selectedProvider === "openai-native") {
+									setApiConfigurationField("reasoningEffort", undefined)
+								}
 							}}>
 							<SelectTrigger className="w-full">
 								<SelectValue placeholder={t("settings:common.select")} />
@@ -617,11 +623,14 @@ const ApiOptions = ({
 				modelInfo={selectedModelInfo}
 			/>
 
-			<Verbosity
-				apiConfiguration={apiConfiguration}
-				setApiConfigurationField={setApiConfigurationField}
-				modelInfo={selectedModelInfo}
-			/>
+			{/* Gate Verbosity UI by capability flag */}
+			{selectedModelInfo?.supportsVerbosity && (
+				<Verbosity
+					apiConfiguration={apiConfiguration}
+					setApiConfigurationField={setApiConfigurationField}
+					modelInfo={selectedModelInfo}
+				/>
+			)}
 
 			{!fromWelcomeView && (
 				<Collapsible open={isAdvancedSettingsOpen} onOpenChange={setIsAdvancedSettingsOpen}>
diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx
index a49ec79efc..a3e2d428b4 100644
--- a/webview-ui/src/components/settings/ThinkingBudget.tsx
+++ b/webview-ui/src/components/settings/ThinkingBudget.tsx
@@ -1,7 +1,12 @@
 import { useEffect } from "react"
 import { Checkbox } from "vscrui"
 
-import { type ProviderSettings, type ModelInfo, type ReasoningEffort, reasoningEfforts } from "@roo-code/types"
+import {
+	type ProviderSettings,
+	type ModelInfo,
+	type ReasoningEffortWithMinimal,
+	reasoningEfforts,
+} from "@roo-code/types"
 
 import {
 	DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS,
@@ -27,10 +32,35 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 	const isGemini25Pro = selectedModelId && selectedModelId.includes("gemini-2.5-pro")
 	const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
 
+	// Check if this is a GPT-5 model to show "minimal" option
+	// Only show minimal for OpenAI Native provider GPT-5 models
+	const isOpenAiNativeProvider = apiConfiguration.apiProvider === "openai-native"
+	const isGpt5Model = isOpenAiNativeProvider && selectedModelId && selectedModelId.startsWith("gpt-5")
+	// Add "minimal" option for GPT-5 models
+	// Spread to convert readonly tuple into a mutable array, then expose as readonly for safety
+	const baseEfforts = [...reasoningEfforts] as ReasoningEffortWithMinimal[]
+	const availableReasoningEfforts: ReadonlyArray<ReasoningEffortWithMinimal> = isGpt5Model
+		? (["minimal", ...baseEfforts] as ReasoningEffortWithMinimal[])
+		: baseEfforts
+
+	// Default reasoning effort - use model's default if available
+	// GPT-5 models have "medium" as their default in the model configuration
+	const modelDefaultReasoningEffort = modelInfo?.reasoningEffort as ReasoningEffortWithMinimal | undefined
+	const defaultReasoningEffort: ReasoningEffortWithMinimal = modelDefaultReasoningEffort || "medium"
+	const currentReasoningEffort: ReasoningEffortWithMinimal =
+		(apiConfiguration.reasoningEffort as ReasoningEffortWithMinimal | undefined) || defaultReasoningEffort
+
 	const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget
 	const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget
 	const isReasoningEffortSupported = !!modelInfo && modelInfo.supportsReasoningEffort
 
+	// Set default reasoning effort when model supports it and no value is set
+	useEffect(() => {
+		if (isReasoningEffortSupported && !apiConfiguration.reasoningEffort && defaultReasoningEffort) {
+			setApiConfigurationField("reasoningEffort", defaultReasoningEffort)
+		}
+	}, [isReasoningEffortSupported, apiConfiguration.reasoningEffort, defaultReasoningEffort, setApiConfigurationField])
+
 	const enableReasoningEffort = apiConfiguration.enableReasoningEffort
 	const customMaxOutputTokens = apiConfiguration.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
 	const customMaxThinkingTokens =
@@ -109,13 +139,21 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 				<label className="block font-medium mb-1">{t("settings:providers.reasoningEffort.label")}</label>
 			</div>
 			<Select
-				value={apiConfiguration.reasoningEffort}
-				onValueChange={(value) => setApiConfigurationField("reasoningEffort", value as ReasoningEffort)}>
+				value={currentReasoningEffort}
+				onValueChange={(value: ReasoningEffortWithMinimal) => {
+					setApiConfigurationField("reasoningEffort", value)
+				}}>
 				<SelectTrigger className="w-full">
-					<SelectValue placeholder={t("settings:common.select")} />
+					<SelectValue
+						placeholder={
+							currentReasoningEffort
+								? t(`settings:providers.reasoningEffort.${currentReasoningEffort}`)
+								: t("settings:common.select")
+						}
+					/>
 				</SelectTrigger>
 				<SelectContent>
-					{reasoningEfforts.map((value) => (
+					{availableReasoningEfforts.map((value) => (
 						<SelectItem key={value} value={value}>
 							{t(`settings:providers.reasoningEffort.${value}`)}
 						</SelectItem>
diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json
index 3a534fb031..bcdf30a803 100644
--- a/webview-ui/src/i18n/locales/ca/settings.json
+++ b/webview-ui/src/i18n/locales/ca/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Esforç de raonament del model",
+			"minimal": "Mínim (el més ràpid)",
 			"high": "Alt",
 			"medium": "Mitjà",
 			"low": "Baix"
diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json
index d13050cc7a..f434c5f413 100644
--- a/webview-ui/src/i18n/locales/de/settings.json
+++ b/webview-ui/src/i18n/locales/de/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Modell-Denkaufwand",
+			"minimal": "Minimal (schnellste)",
 			"high": "Hoch",
 			"medium": "Mittel",
 			"low": "Niedrig"
diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json
index 224ad4fdd7..eb893d6c51 100644
--- a/webview-ui/src/i18n/locales/en/settings.json
+++ b/webview-ui/src/i18n/locales/en/settings.json
@@ -428,9 +428,10 @@
 		},
 		"reasoningEffort": {
 			"label": "Model Reasoning Effort",
-			"high": "High",
+			"minimal": "Minimal (Fastest)",
+			"low": "Low",
 			"medium": "Medium",
-			"low": "Low"
+			"high": "High"
 		},
 		"verbosity": {
 			"label": "Output Verbosity",
diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json
index 79ac8c5510..f5fff79f6a 100644
--- a/webview-ui/src/i18n/locales/es/settings.json
+++ b/webview-ui/src/i18n/locales/es/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Esfuerzo de razonamiento del modelo",
+			"minimal": "Mínimo (el más rápido)",
 			"high": "Alto",
 			"medium": "Medio",
 			"low": "Bajo"
diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json
index 8a8738ed9b..3e2749c36c 100644
--- a/webview-ui/src/i18n/locales/fr/settings.json
+++ b/webview-ui/src/i18n/locales/fr/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Effort de raisonnement du modèle",
+			"minimal": "Minimal (le plus rapide)",
 			"high": "Élevé",
 			"medium": "Moyen",
 			"low": "Faible"
diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json
index 18c5061a13..4fecd0ea54 100644
--- a/webview-ui/src/i18n/locales/hi/settings.json
+++ b/webview-ui/src/i18n/locales/hi/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "मॉडल तर्क प्रयास",
+			"minimal": "न्यूनतम (सबसे तेज़)",
 			"high": "उच्च",
 			"medium": "मध्यम",
 			"low": "निम्न"
diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json
index 3a4800f4f2..576784dc77 100644
--- a/webview-ui/src/i18n/locales/id/settings.json
+++ b/webview-ui/src/i18n/locales/id/settings.json
@@ -433,6 +433,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Upaya Reasoning Model",
+			"minimal": "Minimal (Tercepat)",
 			"high": "Tinggi",
 			"medium": "Sedang",
 			"low": "Rendah"
diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json
index e116bf2ae3..1a657c5a30 100644
--- a/webview-ui/src/i18n/locales/it/settings.json
+++ b/webview-ui/src/i18n/locales/it/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Sforzo di ragionamento del modello",
+			"minimal": "Minimo (più veloce)",
 			"high": "Alto",
 			"medium": "Medio",
 			"low": "Basso"
diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json
index 407d31e457..9c35c02d64 100644
--- a/webview-ui/src/i18n/locales/ja/settings.json
+++ b/webview-ui/src/i18n/locales/ja/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "モデル推論の労力",
+			"minimal": "最小 (最速)",
 			"high": "高",
 			"medium": "中",
 			"low": "低"
diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json
index 3cdb2e8b4f..ee4d6a1889 100644
--- a/webview-ui/src/i18n/locales/ko/settings.json
+++ b/webview-ui/src/i18n/locales/ko/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "모델 추론 노력",
+			"minimal": "최소 (가장 빠름)",
 			"high": "높음",
 			"medium": "중간",
 			"low": "낮음"
diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json
index 2061474d17..9a514520eb 100644
--- a/webview-ui/src/i18n/locales/nl/settings.json
+++ b/webview-ui/src/i18n/locales/nl/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Model redeneervermogen",
+			"minimal": "Minimaal (Snelst)",
 			"high": "Hoog",
 			"medium": "Middel",
 			"low": "Laag"
diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json
index b081005400..6f2ff53e0b 100644
--- a/webview-ui/src/i18n/locales/pl/settings.json
+++ b/webview-ui/src/i18n/locales/pl/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Wysiłek rozumowania modelu",
+			"minimal": "Minimalny (najszybszy)",
 			"high": "Wysoki",
 			"medium": "Średni",
 			"low": "Niski"
diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json
index a71340391d..8dd6cec52b 100644
--- a/webview-ui/src/i18n/locales/pt-BR/settings.json
+++ b/webview-ui/src/i18n/locales/pt-BR/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Esforço de raciocínio do modelo",
+			"minimal": "Mínimo (mais rápido)",
 			"high": "Alto",
 			"medium": "Médio",
 			"low": "Baixo"
diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json
index 00e9b79074..b758c86a94 100644
--- a/webview-ui/src/i18n/locales/ru/settings.json
+++ b/webview-ui/src/i18n/locales/ru/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Усилия по рассуждению модели",
+			"minimal": "Минимальный (самый быстрый)",
 			"high": "Высокие",
 			"medium": "Средние",
 			"low": "Низкие"
diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json
index e79db2a3b2..542b2b2585 100644
--- a/webview-ui/src/i18n/locales/tr/settings.json
+++ b/webview-ui/src/i18n/locales/tr/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Model Akıl Yürütme Çabası",
+			"minimal": "Minimal (en hızlı)",
 			"high": "Yüksek",
 			"medium": "Orta",
 			"low": "Düşük"
diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json
index 10de98d4da..8b1dd793fe 100644
--- a/webview-ui/src/i18n/locales/vi/settings.json
+++ b/webview-ui/src/i18n/locales/vi/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "Nỗ lực suy luận của mô hình",
+			"minimal": "Tối thiểu (nhanh nhất)",
 			"high": "Cao",
 			"medium": "Trung bình",
 			"low": "Thấp"
diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json
index a6971d288f..e475fd7baa 100644
--- a/webview-ui/src/i18n/locales/zh-CN/settings.json
+++ b/webview-ui/src/i18n/locales/zh-CN/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "模型推理强度",
+			"minimal": "最小 (最快)",
 			"high": "高",
 			"medium": "中",
 			"low": "低"
diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json
index bec2ffd5e9..9242861eaa 100644
--- a/webview-ui/src/i18n/locales/zh-TW/settings.json
+++ b/webview-ui/src/i18n/locales/zh-TW/settings.json
@@ -429,6 +429,7 @@
 		},
 		"reasoningEffort": {
 			"label": "模型推理強度",
+			"minimal": "最小 (最快)",
 			"high": "高",
 			"medium": "中",
 			"low": "低"