diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts
index 90b61ad879e..f3095b2869e 100644
--- a/packages/types/src/model.ts
+++ b/packages/types/src/model.ts
@@ -47,6 +47,8 @@ export const modelInfoSchema = z.object({
 	// Capability flag to indicate whether the model supports an output verbosity parameter
 	supportsVerbosity: z.boolean().optional(),
 	supportsReasoningBudget: z.boolean().optional(),
+	// Capability flag to indicate whether the model supports temperature parameter
+	supportsTemperature: z.boolean().optional(),
 	requiredReasoningBudget: z.boolean().optional(),
 	supportsReasoningEffort: z.boolean().optional(),
 	supportedParameters: z.array(modelParametersSchema).optional(),
diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
index ff798249848..bdc383cf2b7 100644
--- a/packages/types/src/providers/openai.ts
+++ b/packages/types/src/providers/openai.ts
@@ -19,6 +19,7 @@ export const openAiNativeModels = {
 		description: "GPT-5: The best model for coding and agentic tasks across domains",
 		// supportsVerbosity is a new capability; ensure ModelInfo includes it
 		supportsVerbosity: true,
+		supportsTemperature: false,
 	},
 	"gpt-5-mini-2025-08-07": {
 		maxTokens: 128000,
@@ -32,6 +33,7 @@ export const openAiNativeModels = {
 		cacheReadsPrice: 0.03,
 		description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
 		supportsVerbosity: true,
+		supportsTemperature: false,
 	},
 	"gpt-5-nano-2025-08-07": {
 		maxTokens: 128000,
@@ -45,6 +47,7 @@ export const openAiNativeModels = {
 		cacheReadsPrice: 0.01,
 		description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
 		supportsVerbosity: true,
+		supportsTemperature: false,
 	},
 	"gpt-4.1": {
 		maxTokens: 32_768,
@@ -54,6 +57,7 @@ export const openAiNativeModels = {
 		inputPrice: 2,
 		outputPrice: 8,
 		cacheReadsPrice: 0.5,
+		supportsTemperature: true,
 	},
 	"gpt-4.1-mini": {
 		maxTokens: 32_768,
@@ -63,6 +67,7 @@ export const openAiNativeModels = {
 		inputPrice: 0.4,
 		outputPrice: 1.6,
 		cacheReadsPrice: 0.1,
+		supportsTemperature: true,
 	},
 	"gpt-4.1-nano": {
 		maxTokens: 32_768,
@@ -72,6 +77,7 @@ export const openAiNativeModels = {
 		inputPrice: 0.1,
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.025,
+		supportsTemperature: true,
 	},
 	o3: {
 		maxTokens: 100_000,
@@ -83,6 +89,7 @@ export const openAiNativeModels = {
 		cacheReadsPrice: 0.5,
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
+		supportsTemperature: false,
 	},
 	"o3-high": {
 		maxTokens: 100_000,
@@ -93,6 +100,7 @@ export const openAiNativeModels = {
 		outputPrice: 8.0,
 		cacheReadsPrice: 0.5,
 		reasoningEffort: "high",
+		supportsTemperature: false,
 	},
 	"o3-low": {
 		maxTokens: 100_000,
@@ -103,6 +111,7 @@ export const openAiNativeModels = {
 		outputPrice: 8.0,
 		cacheReadsPrice: 0.5,
 		reasoningEffort: "low",
+		supportsTemperature: false,
 	},
 	"o4-mini": {
 		maxTokens: 100_000,
@@ -114,6 +123,7 @@ export const openAiNativeModels = {
 		cacheReadsPrice: 0.275,
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
+		supportsTemperature: false,
 	},
 	"o4-mini-high": {
 		maxTokens: 100_000,
@@ -124,6 +134,7 @@ export const openAiNativeModels = {
 		outputPrice: 4.4,
 		cacheReadsPrice: 0.275,
 		reasoningEffort: "high",
+		supportsTemperature: false,
 	},
 	"o4-mini-low": {
 		maxTokens: 100_000,
@@ -134,6 +145,7 @@ export const openAiNativeModels = {
 		outputPrice: 4.4,
 		cacheReadsPrice: 0.275,
 		reasoningEffort: "low",
+		supportsTemperature: false,
 	},
 	"o3-mini": {
 		maxTokens: 100_000,
@@ -145,6 +157,7 @@ export const openAiNativeModels = {
 		cacheReadsPrice: 0.55,
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
+		supportsTemperature: false,
 	},
 	"o3-mini-high": {
 		maxTokens: 100_000,
@@ -155,6 +168,7 @@ export const openAiNativeModels = {
 		outputPrice: 4.4,
 		cacheReadsPrice: 0.55,
 		reasoningEffort: "high",
+		supportsTemperature: false,
 	},
 	"o3-mini-low": {
 		maxTokens: 100_000,
@@ -165,6 +179,7 @@ export const openAiNativeModels = {
 		outputPrice: 4.4,
 		cacheReadsPrice: 0.55,
 		reasoningEffort: "low",
+		supportsTemperature: false,
 	},
 	o1: {
 		maxTokens: 100_000,
@@ -174,6 +189,7 @@ export const openAiNativeModels = {
 		inputPrice: 15,
 		outputPrice: 60,
 		cacheReadsPrice: 7.5,
+		supportsTemperature: false,
 	},
 	"o1-preview": {
 		maxTokens: 32_768,
@@ -183,6 +199,7 @@ export const openAiNativeModels = {
 		inputPrice: 15,
 		outputPrice: 60,
 		cacheReadsPrice: 7.5,
+		supportsTemperature: false,
 	},
 	"o1-mini": {
 		maxTokens: 65_536,
@@ -192,6 +209,7 @@ export const openAiNativeModels = {
 		inputPrice: 1.1,
 		outputPrice: 4.4,
 		cacheReadsPrice: 0.55,
+		supportsTemperature: false,
 	},
 	"gpt-4o": {
 		maxTokens: 16_384,
@@ -201,6 +219,7 @@ export const openAiNativeModels = {
 		inputPrice: 2.5,
 		outputPrice: 10,
 		cacheReadsPrice: 1.25,
+		supportsTemperature: true,
 	},
 	"gpt-4o-mini": {
 		maxTokens: 16_384,
@@ -210,6 +229,7 @@ export const openAiNativeModels = {
 		inputPrice: 0.15,
 		outputPrice: 0.6,
 		cacheReadsPrice: 0.075,
+		supportsTemperature: true,
 	},
 	"codex-mini-latest": {
 		maxTokens: 16_384,
@@ -219,6 +239,7 @@ export const openAiNativeModels = {
 		inputPrice: 1.5,
 		outputPrice: 6,
 		cacheReadsPrice: 0,
+		supportsTemperature: false,
 		description:
 			"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
 	},
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index cc7fd4a06ee..3502320789d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -656,8 +656,8 @@ importers:
         specifier: ^12.0.0
         version: 12.0.0
       openai:
-        specifier: ^5.0.0
-        version: 5.5.1(ws@8.18.3)(zod@3.25.61)
+        specifier: ^5.12.2
+        version: 5.12.2(ws@8.18.3)(zod@3.25.61)
       os-name:
         specifier: ^6.0.0
         version: 6.1.0
@@ -7621,8 +7621,8 @@ packages:
     resolution: {integrity: sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw==}
     engines: {node: '>=18'}
 
-  openai@5.5.1:
-    resolution: {integrity: sha512-5i19097mGotHA1eFsM6Tjd/tJ8uo9sa5Ysv4Q6bKJ2vtN6rc0MzMrUefXnLXYAJcmMQrC1Efhj0AvfIkXrQamw==}
+  openai@5.12.2:
+    resolution: {integrity: sha512-xqzHHQch5Tws5PcKR2xsZGX9xtch+JQFz5zb14dGqlshmmDAFBFEWmeIpf7wVqWV+w7Emj7jRgkNJakyKE0tYQ==}
     hasBin: true
     peerDependencies:
       ws: ^8.18.0
@@ -17631,7 +17631,7 @@ snapshots:
       is-inside-container: 1.0.0
       is-wsl: 3.1.0
 
-  openai@5.5.1(ws@8.18.3)(zod@3.25.61):
+  openai@5.12.2(ws@8.18.3)(zod@3.25.61):
     optionalDependencies:
       ws: 8.18.3
       zod: 3.25.61
diff --git a/src/api/index.ts b/src/api/index.ts
index c29c230b063..f8df58c768d 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -52,6 +52,14 @@ export interface ApiHandlerCreateMessageMetadata {
 	 * Used to enforce "skip once" after a condense operation.
 	 */
 	suppressPreviousResponseId?: boolean
+	/**
+	 * Controls whether the response should be stored for 30 days in OpenAI's Responses API.
+	 * When true (default), responses are stored and can be referenced in future requests
+	 * using the previous_response_id for efficient conversation continuity.
+	 * Set to false to opt out of response storage for privacy or compliance reasons.
+	 * @default true
+	 */
+	store?: boolean
 }
 
 export interface ApiHandler {
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
index 0acdb6202e3..e9e54049303 100644
--- a/src/api/providers/__tests__/openai-native.spec.ts
+++ b/src/api/providers/__tests__/openai-native.spec.ts
@@ -5,62 +5,15 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import { OpenAiNativeHandler } from "../openai-native"
 import { ApiHandlerOptions } from "../../../shared/api"
 
-// Mock OpenAI client
-const mockCreate = vitest.fn()
+// Mock OpenAI client - now everything uses Responses API
+const mockResponsesCreate = vitest.fn()
 
 vitest.mock("openai", () => {
 	return {
 		__esModule: true,
 		default: vitest.fn().mockImplementation(() => ({
-			chat: {
-				completions: {
-					create: mockCreate.mockImplementation(async (options) => {
-						if (!options.stream) {
-							return {
-								id: "test-completion",
-								choices: [
-									{
-										message: { role: "assistant", content: "Test response" },
-										finish_reason: "stop",
-										index: 0,
-									},
-								],
-								usage: {
-									prompt_tokens: 10,
-									completion_tokens: 5,
-									total_tokens: 15,
-								},
-							}
-						}
-
-						return {
-							[Symbol.asyncIterator]: async function* () {
-								yield {
-									choices: [
-										{
-											delta: { content: "Test response" },
-											index: 0,
-										},
-									],
-									usage: null,
-								}
-								yield {
-									choices: [
-										{
-											delta: {},
-											index: 0,
-										},
-									],
-									usage: {
-										prompt_tokens: 10,
-										completion_tokens: 5,
-										total_tokens: 15,
-									},
-								}
-							},
-						}
-					}),
-				},
+			responses: {
+				create: mockResponsesCreate,
 			},
 		})),
 	}
@@ -83,7 +36,18 @@ describe("OpenAiNativeHandler", () => {
 			openAiNativeApiKey: "test-api-key",
 		}
 		handler = new OpenAiNativeHandler(mockOptions)
-		mockCreate.mockClear()
+		mockResponsesCreate.mockClear()
+		// Clear fetch mock if it exists
+		if ((global as any).fetch) {
+			delete (global as any).fetch
+		}
+	})
+
+	afterEach(() => {
+		// Clean up fetch mock
+		if ((global as any).fetch) {
+			delete (global as any).fetch
+		}
 	})
 
 	describe("constructor", () => {
@@ -102,7 +66,33 @@ describe("OpenAiNativeHandler", () => {
 	})
 
 	describe("createMessage", () => {
-		it("should handle streaming responses", async () => {
+		it("should handle streaming responses via Responses API", async () => {
+			// Mock fetch for Responses API fallback
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode('data: {"type":"response.text.delta","delta":"Test"}\n\n'),
+						)
+						controller.enqueue(
+							new TextEncoder().encode('data: {"type":"response.text.delta","delta":" response"}\n\n'),
+						)
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.done","response":{"usage":{"prompt_tokens":10,"completion_tokens":2}}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			// Mock SDK to fail so it falls back to fetch
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			const stream = handler.createMessage(systemPrompt, messages)
 			const chunks: any[] = []
 			for await (const chunk of stream) {
@@ -111,505 +101,38 @@ describe("OpenAiNativeHandler", () => {
 
 			expect(chunks.length).toBeGreaterThan(0)
 			const textChunks = chunks.filter((chunk) => chunk.type === "text")
-			expect(textChunks).toHaveLength(1)
-			expect(textChunks[0].text).toBe("Test response")
+			expect(textChunks).toHaveLength(2)
+			expect(textChunks[0].text).toBe("Test")
+			expect(textChunks[1].text).toBe(" response")
 		})
 
 		it("should handle API errors", async () => {
-			mockCreate.mockRejectedValueOnce(new Error("API Error"))
+			// Mock fetch to return error
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: false,
+				status: 500,
+				text: async () => "Internal Server Error",
+			})
+			global.fetch = mockFetch as any
+
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			const stream = handler.createMessage(systemPrompt, messages)
 			await expect(async () => {
 				for await (const _chunk of stream) {
 					// Should not reach here
 				}
-			}).rejects.toThrow("API Error")
-		})
-
-		it("should handle missing content in response for o1 model", async () => {
-			// Use o1 model which supports developer role
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "o1",
-			})
-
-			mockCreate.mockResolvedValueOnce({
-				[Symbol.asyncIterator]: async function* () {
-					yield {
-						choices: [
-							{
-								delta: { content: null },
-								index: 0,
-							},
-						],
-						usage: {
-							prompt_tokens: 0,
-							completion_tokens: 0,
-							total_tokens: 0,
-						},
-					}
-				},
-			})
-
-			const generator = handler.createMessage(systemPrompt, messages)
-			const results = []
-			for await (const result of generator) {
-				results.push(result)
-			}
-
-			// Verify essential fields directly
-			expect(results.length).toBe(1)
-			expect(results[0].type).toBe("usage")
-			// Use type assertion to avoid TypeScript errors
-			const usageResult = results[0] as any
-			expect(usageResult.inputTokens).toBe(0)
-			expect(usageResult.outputTokens).toBe(0)
-			// When no cache tokens are present, they should be undefined
-			expect(usageResult.cacheWriteTokens).toBeUndefined()
-			expect(usageResult.cacheReadTokens).toBeUndefined()
-
-			// Verify developer role is used for system prompt with o1 model
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "o1",
-				messages: [
-					{ role: "developer", content: "Formatting re-enabled\n" + systemPrompt },
-					{ role: "user", content: "Hello!" },
-				],
-				stream: true,
-				stream_options: { include_usage: true },
-			})
-		})
-
-		it("should handle o3-mini model family correctly", async () => {
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "o3-mini",
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "o3-mini",
-				messages: [
-					{ role: "developer", content: "Formatting re-enabled\n" + systemPrompt },
-					{ role: "user", content: "Hello!" },
-				],
-				stream: true,
-				stream_options: { include_usage: true },
-				reasoning_effort: "medium",
-			})
-		})
-	})
-
-	describe("streaming models", () => {
-		beforeEach(() => {
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-4.1",
-			})
-		})
-
-		it("should handle streaming response", async () => {
-			const mockStream = [
-				{ choices: [{ delta: { content: "Hello" } }], usage: null },
-				{ choices: [{ delta: { content: " there" } }], usage: null },
-				{ choices: [{ delta: { content: "!" } }], usage: { prompt_tokens: 10, completion_tokens: 5 } },
-			]
-
-			mockCreate.mockResolvedValueOnce(
-				(async function* () {
-					for (const chunk of mockStream) {
-						yield chunk
-					}
-				})(),
-			)
-
-			const generator = handler.createMessage(systemPrompt, messages)
-			const results = []
-			for await (const result of generator) {
-				results.push(result)
-			}
-
-			// Verify text responses individually
-			expect(results.length).toBe(4)
-			expect(results[0]).toMatchObject({ type: "text", text: "Hello" })
-			expect(results[1]).toMatchObject({ type: "text", text: " there" })
-			expect(results[2]).toMatchObject({ type: "text", text: "!" })
-
-			// Check usage data fields but use toBeCloseTo for floating point comparison
-			expect(results[3].type).toBe("usage")
-			// Use type assertion to avoid TypeScript errors
-			expect((results[3] as any).inputTokens).toBe(10)
-			expect((results[3] as any).outputTokens).toBe(5)
-			expect((results[3] as any).totalCost).toBeCloseTo(0.00006, 6)
-
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "gpt-4.1",
-				temperature: 0,
-				messages: [
-					{ role: "system", content: systemPrompt },
-					{ role: "user", content: "Hello!" },
-				],
-				stream: true,
-				stream_options: { include_usage: true },
-			})
-		})
-
-		it("should not include verbosity parameter for models that don't support it", async () => {
-			// Test with gpt-4.1 which does NOT support verbosity
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-4.1",
-				verbosity: "high", // Set verbosity but it should be ignored
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify that verbosity is NOT included in the request
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs).not.toHaveProperty("verbosity")
-			expect(callArgs.model).toBe("gpt-4.1")
-			expect(callArgs.temperature).toBe(0)
-			expect(callArgs.stream).toBe(true)
-		})
-
-		it("should not include verbosity for gpt-4o models", async () => {
-			// Test with gpt-4o which does NOT support verbosity
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-4o",
-				verbosity: "medium", // Set verbosity but it should be ignored
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify that verbosity is NOT included in the request
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs).not.toHaveProperty("verbosity")
-			expect(callArgs.model).toBe("gpt-4o")
-		})
-
-		it("should not include verbosity for gpt-4.1-mini models", async () => {
-			// Test with gpt-4.1-mini which does NOT support verbosity
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-4.1-mini",
-				verbosity: "low", // Set verbosity but it should be ignored
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify that verbosity is NOT included in the request
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs).not.toHaveProperty("verbosity")
-			expect(callArgs.model).toBe("gpt-4.1-mini")
-		})
-
-		it("should handle empty delta content", async () => {
-			const mockStream = [
-				{ choices: [{ delta: {} }], usage: null },
-				{ choices: [{ delta: { content: null } }], usage: null },
-				{ choices: [{ delta: { content: "Hello" } }], usage: { prompt_tokens: 10, completion_tokens: 5 } },
-			]
-
-			mockCreate.mockResolvedValueOnce(
-				(async function* () {
-					for (const chunk of mockStream) {
-						yield chunk
-					}
-				})(),
-			)
-
-			const generator = handler.createMessage(systemPrompt, messages)
-			const results = []
-			for await (const result of generator) {
-				results.push(result)
-			}
-
-			// Verify responses individually
-			expect(results.length).toBe(2)
-			expect(results[0]).toMatchObject({ type: "text", text: "Hello" })
-
-			// Check usage data fields but use toBeCloseTo for floating point comparison
-			expect(results[1].type).toBe("usage")
-			// Use type assertion to avoid TypeScript errors
-			expect((results[1] as any).inputTokens).toBe(10)
-			expect((results[1] as any).outputTokens).toBe(5)
-			expect((results[1] as any).totalCost).toBeCloseTo(0.00006, 6)
-		})
-
-		it("should handle cache tokens in streaming response", async () => {
-			const mockStream = [
-				{ choices: [{ delta: { content: "Hello" } }], usage: null },
-				{ choices: [{ delta: { content: " cached" } }], usage: null },
-				{
-					choices: [{ delta: { content: " response" } }],
-					usage: {
-						prompt_tokens: 100,
-						completion_tokens: 10,
-						prompt_tokens_details: {
-							cached_tokens: 80,
-							audio_tokens: 0,
-						},
-						completion_tokens_details: {
-							reasoning_tokens: 0,
-							audio_tokens: 0,
-							accepted_prediction_tokens: 0,
-							rejected_prediction_tokens: 0,
-						},
-					},
-				},
-			]
-
-			mockCreate.mockResolvedValueOnce(
-				(async function* () {
-					for (const chunk of mockStream) {
-						yield chunk
-					}
-				})(),
-			)
-
-			const generator = handler.createMessage(systemPrompt, messages)
-			const results = []
-			for await (const result of generator) {
-				results.push(result)
-			}
-
-			// Verify text responses
-			expect(results.length).toBe(4)
-			expect(results[0]).toMatchObject({ type: "text", text: "Hello" })
-			expect(results[1]).toMatchObject({ type: "text", text: " cached" })
-			expect(results[2]).toMatchObject({ type: "text", text: " response" })
-
-			// Check usage data includes cache tokens
-			expect(results[3].type).toBe("usage")
-			const usageChunk = results[3] as any
-			expect(usageChunk.inputTokens).toBe(100) // Total input tokens (includes cached)
-			expect(usageChunk.outputTokens).toBe(10)
-			expect(usageChunk.cacheReadTokens).toBe(80) // Cached tokens from prompt_tokens_details
-			expect(usageChunk.cacheWriteTokens).toBeUndefined() // No cache write tokens in standard response
-
-			// Verify cost calculation takes cache into account
-			// GPT-4.1 pricing: input $2/1M, output $8/1M, cache read $0.5/1M
-			// OpenAI's prompt_tokens includes cached tokens, so we need to calculate:
-			// - Non-cached input tokens: 100 - 80 = 20
-			// - Cost for non-cached input: (20 / 1_000_000) * 2.0
-			// - Cost for cached input: (80 / 1_000_000) * 0.5
-			// - Cost for output: (10 / 1_000_000) * 8.0
-			const nonCachedInputTokens = 100 - 80
-			const expectedNonCachedInputCost = (nonCachedInputTokens / 1_000_000) * 2.0
-			const expectedCacheReadCost = (80 / 1_000_000) * 0.5
-			const expectedOutputCost = (10 / 1_000_000) * 8.0
-			const expectedTotalCost = expectedNonCachedInputCost + expectedCacheReadCost + expectedOutputCost
-			expect(usageChunk.totalCost).toBeCloseTo(expectedTotalCost, 10)
-		})
-
-		it("should handle cache write tokens if present", async () => {
-			const mockStream = [
-				{ choices: [{ delta: { content: "Test" } }], usage: null },
-				{
-					choices: [{ delta: {} }],
-					usage: {
-						prompt_tokens: 150,
-						completion_tokens: 5,
-						prompt_tokens_details: {
-							cached_tokens: 50,
-						},
-						cache_creation_input_tokens: 30, // Cache write tokens
-					},
-				},
-			]
-
-			mockCreate.mockResolvedValueOnce(
-				(async function* () {
-					for (const chunk of mockStream) {
-						yield chunk
-					}
-				})(),
-			)
-
-			const generator = handler.createMessage(systemPrompt, messages)
-			const results = []
-			for await (const result of generator) {
-				results.push(result)
-			}
-
-			// Check usage data includes both cache read and write tokens
-			const usageChunk = results.find((r) => r.type === "usage") as any
-			expect(usageChunk).toBeDefined()
-			expect(usageChunk.inputTokens).toBe(150)
-			expect(usageChunk.outputTokens).toBe(5)
-			expect(usageChunk.cacheReadTokens).toBe(50)
-			expect(usageChunk.cacheWriteTokens).toBe(30)
+			}).rejects.toThrow("OpenAI service error")
 		})
 	})
 
 	describe("completePrompt", () => {
-		it("should complete prompt successfully with gpt-4.1 model", async () => {
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("Test response")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "gpt-4.1",
-				messages: [{ role: "user", content: "Test prompt" }],
-				temperature: 0,
-			})
-		})
-
-		it("should complete prompt successfully with o1 model", async () => {
-			handler = new OpenAiNativeHandler({
-				apiModelId: "o1",
-				openAiNativeApiKey: "test-api-key",
-			})
-
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("Test response")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "o1",
-				messages: [{ role: "user", content: "Test prompt" }],
-			})
-		})
-
-		it("should complete prompt successfully with o1-preview model", async () => {
-			handler = new OpenAiNativeHandler({
-				apiModelId: "o1-preview",
-				openAiNativeApiKey: "test-api-key",
-			})
-
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("Test response")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "o1-preview",
-				messages: [{ role: "user", content: "Test prompt" }],
-			})
-		})
-
-		it("should complete prompt successfully with o1-mini model", async () => {
-			handler = new OpenAiNativeHandler({
-				apiModelId: "o1-mini",
-				openAiNativeApiKey: "test-api-key",
-			})
-
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("Test response")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "o1-mini",
-				messages: [{ role: "user", content: "Test prompt" }],
-			})
-		})
-
-		it("should complete prompt successfully with o3-mini model", async () => {
-			handler = new OpenAiNativeHandler({
-				apiModelId: "o3-mini",
-				openAiNativeApiKey: "test-api-key",
-			})
-
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("Test response")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "o3-mini",
-				messages: [{ role: "user", content: "Test prompt" }],
-				reasoning_effort: "medium",
-			})
-		})
-
-		it("should handle API errors", async () => {
-			mockCreate.mockRejectedValueOnce(new Error("API Error"))
+		it("should throw error for all models since Responses API doesn't support non-streaming", async () => {
 			await expect(handler.completePrompt("Test prompt")).rejects.toThrow(
-				"OpenAI Native completion error: API Error",
+				"completePrompt is not supported. Use createMessage (Responses API) instead.",
 			)
 		})
-
-		it("should handle empty response", async () => {
-			mockCreate.mockResolvedValueOnce({
-				choices: [{ message: { content: "" } }],
-			})
-			const result = await handler.completePrompt("Test prompt")
-			expect(result).toBe("")
-		})
-	})
-
-	describe("temperature parameter handling", () => {
-		it("should include temperature for models that support it", async () => {
-			// Test with gpt-4.1 which supports temperature
-			handler = new OpenAiNativeHandler({
-				apiModelId: "gpt-4.1",
-				openAiNativeApiKey: "test-api-key",
-			})
-
-			await handler.completePrompt("Test prompt")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: "gpt-4.1",
-				messages: [{ role: "user", content: "Test prompt" }],
-				temperature: 0,
-			})
-		})
-
-		it("should strip temperature for o1 family models", async () => {
-			const o1Models = ["o1", "o1-preview", "o1-mini"]
-
-			for (const modelId of o1Models) {
-				handler = new OpenAiNativeHandler({
-					apiModelId: modelId,
-					openAiNativeApiKey: "test-api-key",
-				})
-
-				mockCreate.mockClear()
-				await handler.completePrompt("Test prompt")
-
-				const callArgs = mockCreate.mock.calls[0][0]
-				// Temperature should be undefined for o1 models
-				expect(callArgs.temperature).toBeUndefined()
-				expect(callArgs.model).toBe(modelId)
-			}
-		})
-
-		it("should strip temperature for o3-mini model", async () => {
-			handler = new OpenAiNativeHandler({
-				apiModelId: "o3-mini",
-				openAiNativeApiKey: "test-api-key",
-			})
-
-			await handler.completePrompt("Test prompt")
-
-			const callArgs = mockCreate.mock.calls[0][0]
-			// Temperature should be undefined for o3-mini models
-			expect(callArgs.temperature).toBeUndefined()
-			expect(callArgs.model).toBe("o3-mini")
-			expect(callArgs.reasoning_effort).toBe("medium")
-		})
-
-		it("should strip temperature in streaming mode for unsupported models", async () => {
-			handler = new OpenAiNativeHandler({
-				apiModelId: "o1",
-				openAiNativeApiKey: "test-api-key",
-			})
-
-			const stream = handler.createMessage(systemPrompt, messages)
-			// Consume the stream
-			for await (const _chunk of stream) {
-				// Just consume the stream
-			}
-
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs).not.toHaveProperty("temperature")
-			expect(callArgs.model).toBe("o1")
-			expect(callArgs.stream).toBe(true)
-		})
 	})
 
 	describe("getModel", () => {
@@ -666,6 +189,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail so it uses fetch
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -691,22 +217,31 @@ describe("OpenAiNativeHandler", () => {
 				}),
 			)
 			const body1 = (mockFetch.mock.calls[0][1] as any).body as string
-			expect(body1).toContain('"model":"gpt-5-2025-08-07"')
-			expect(body1).toContain('"input":"Developer: You are a helpful assistant.\\n\\nUser: Hello!"')
-			expect(body1).toContain('"effort":"medium"')
-			expect(body1).toContain('"summary":"auto"')
-			expect(body1).toContain('"verbosity":"medium"')
-			expect(body1).toContain('"temperature":1')
-			expect(body1).toContain('"max_output_tokens"')
+			const parsedBody = JSON.parse(body1)
+			expect(parsedBody.model).toBe("gpt-5-2025-08-07")
+			// Now using structured format with content arrays
+			expect(parsedBody.input).toEqual([
+				{
+					role: "developer",
+					content: [{ type: "input_text", text: "You are a helpful assistant." }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "Hello!" }],
+				},
+			])
+			expect(parsedBody.reasoning?.effort).toBe("medium")
+			expect(parsedBody.reasoning?.summary).toBe("auto")
+			expect(parsedBody.text?.verbosity).toBe("medium")
+			// GPT-5 models don't include temperature
+			expect(parsedBody.temperature).toBeUndefined()
+			expect(parsedBody.max_output_tokens).toBeDefined()
 
 			// Verify the streamed content
 			const textChunks = chunks.filter((c) => c.type === "text")
 			expect(textChunks).toHaveLength(2)
 			expect(textChunks[0].text).toBe("Hello")
 			expect(textChunks[1].text).toBe(" world")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should handle GPT-5-mini model with Responses API", async () => {
@@ -727,6 +262,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-mini-2025-08-07",
@@ -745,9 +283,6 @@ describe("OpenAiNativeHandler", () => {
 					body: expect.stringContaining('"model":"gpt-5-mini-2025-08-07"'),
 				}),
 			)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should handle GPT-5-nano model with Responses API", async () => {
@@ -768,6 +303,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-nano-2025-08-07",
@@ -786,9 +324,6 @@ describe("OpenAiNativeHandler", () => {
 					body: expect.stringContaining('"model":"gpt-5-nano-2025-08-07"'),
 				}),
 			)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should support verbosity control for GPT-5", async () => {
@@ -809,6 +344,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -829,9 +367,6 @@ describe("OpenAiNativeHandler", () => {
 					body: expect.stringContaining('"verbosity":"low"'),
 				}),
 			)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should support minimal reasoning effort for GPT-5", async () => {
@@ -852,6 +387,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -871,9 +409,6 @@ describe("OpenAiNativeHandler", () => {
 					body: expect.stringContaining('"effort":"minimal"'),
 				}),
 			)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should support low reasoning effort for GPT-5", async () => {
@@ -894,6 +429,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -914,15 +452,14 @@ describe("OpenAiNativeHandler", () => {
 				}),
 			)
 			const body2 = (mockFetch.mock.calls[0][1] as any).body as string
-			expect(body2).toContain('"model":"gpt-5-2025-08-07"')
-			expect(body2).toContain('"effort":"low"')
-			expect(body2).toContain('"summary":"auto"')
-			expect(body2).toContain('"verbosity":"medium"')
-			expect(body2).toContain('"temperature":1')
-			expect(body2).toContain('"max_output_tokens"')
-
-			// Clean up
-			delete (global as any).fetch
+			const parsedBody = JSON.parse(body2)
+			expect(parsedBody.model).toBe("gpt-5-2025-08-07")
+			expect(parsedBody.reasoning?.effort).toBe("low")
+			expect(parsedBody.reasoning?.summary).toBe("auto")
+			expect(parsedBody.text?.verbosity).toBe("medium")
+			// GPT-5 models don't include temperature
+			expect(parsedBody.temperature).toBeUndefined()
+			expect(parsedBody.max_output_tokens).toBeDefined()
 		})
 
 		it("should support both verbosity and reasoning effort together for GPT-5", async () => {
@@ -943,6 +480,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -964,15 +504,14 @@ describe("OpenAiNativeHandler", () => {
 				}),
 			)
 			const body3 = (mockFetch.mock.calls[0][1] as any).body as string
-			expect(body3).toContain('"model":"gpt-5-2025-08-07"')
-			expect(body3).toContain('"effort":"minimal"')
-			expect(body3).toContain('"summary":"auto"')
-			expect(body3).toContain('"verbosity":"high"')
-			expect(body3).toContain('"temperature":1')
-			expect(body3).toContain('"max_output_tokens"')
-
-			// Clean up
-			delete (global as any).fetch
+			const parsedBody = JSON.parse(body3)
+			expect(parsedBody.model).toBe("gpt-5-2025-08-07")
+			expect(parsedBody.reasoning?.effort).toBe("minimal")
+			expect(parsedBody.reasoning?.summary).toBe("auto")
+			expect(parsedBody.text?.verbosity).toBe("high")
+			// GPT-5 models don't include temperature
+			expect(parsedBody.temperature).toBeUndefined()
+			expect(parsedBody.max_output_tokens).toBeDefined()
 		})
 
 		it("should handle actual GPT-5 Responses API format", async () => {
@@ -1019,6 +558,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -1056,9 +598,6 @@ describe("OpenAiNativeHandler", () => {
 			const expectedOutputCost = (20 / 1_000_000) * 10.0
 			const expectedTotalCost = expectedInputCost + expectedOutputCost
 			expect(usageChunks[0].totalCost).toBeCloseTo(expectedTotalCost, 10)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should handle Responses API with no content gracefully", async () => {
@@ -1075,6 +614,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -1092,9 +634,6 @@ describe("OpenAiNativeHandler", () => {
 			const contentChunks = chunks.filter((c) => c.type === "text" || c.type === "reasoning")
 
 			expect(contentChunks).toHaveLength(0)
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should support previous_response_id for conversation continuity", async () => {
@@ -1126,6 +665,9 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -1155,13 +697,10 @@ describe("OpenAiNativeHandler", () => {
 			// Verify second request includes the provided previous_response_id
 			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
 			expect(secondCallBody.previous_response_id).toBe("resp_456")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should handle unhandled stream events gracefully", async () => {
-			// Mock fetch for the fallback SSE path (which is what gets used when SDK fails)
+			// Mock fetch for the fallback SSE path
 			const mockFetch = vitest.fn().mockResolvedValue({
 				ok: true,
 				body: new ReadableStream({
@@ -1183,21 +722,14 @@ describe("OpenAiNativeHandler", () => {
 			})
 			global.fetch = mockFetch as any
 
-			// Also mock the SDK to throw an error so it falls back to fetch
-			const mockClient = {
-				responses: {
-					create: vitest.fn().mockRejectedValue(new Error("SDK not available")),
-				},
-			}
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
 
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
 			})
 
-			// Replace the client with our mock
-			;(handler as any).client = mockClient
-
 			const stream = handler.createMessage(systemPrompt, messages)
 			const chunks: any[] = []
 			const errors: any[] = []
@@ -1210,20 +742,10 @@ describe("OpenAiNativeHandler", () => {
 				errors.push(error)
 			}
 
-			// Log for debugging
-			if (chunks.length === 0 && errors.length === 0) {
-				console.log("No chunks and no errors received")
-			}
-			if (errors.length > 0) {
-				console.log("Errors:", errors)
-			}
-
 			expect(errors.length).toBe(0)
 			const textChunks = chunks.filter((c) => c.type === "text")
 			expect(textChunks.length).toBeGreaterThan(0)
 			expect(textChunks[0].text).toBe("Hello")
-
-			delete (global as any).fetch
 		})
 
 		it("should use stored response ID when metadata doesn't provide one", async () => {
@@ -1262,6 +784,9 @@ describe("OpenAiNativeHandler", () => {
 				})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -1282,9 +807,6 @@ describe("OpenAiNativeHandler", () => {
 			// Verify second request uses the stored response ID from first request
 			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
 			expect(secondCallBody.previous_response_id).toBe("resp_789")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should only send latest message when using previous_response_id", async () => {
@@ -1328,6 +850,9 @@ describe("OpenAiNativeHandler", () => {
 				})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
@@ -1345,11 +870,26 @@ describe("OpenAiNativeHandler", () => {
 				// consume stream
 			}
 
-			// Verify first request sends full conversation
+			// Verify first request sends full conversation in structured format
 			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.input).toContain("Hello")
-			expect(firstCallBody.input).toContain("Hi there!")
-			expect(firstCallBody.input).toContain("How are you?")
+			expect(firstCallBody.input).toEqual([
+				{
+					role: "developer",
+					content: [{ type: "input_text", text: systemPrompt }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "Hello" }],
+				},
+				{
+					role: "assistant",
+					content: [{ type: "output_text", text: "Hi there!" }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "How are you?" }],
+				},
+			])
 			expect(firstCallBody.previous_response_id).toBeUndefined()
 
 			// Second request with previous_response_id - should only send latest message
@@ -1369,40 +909,49 @@ describe("OpenAiNativeHandler", () => {
 				// consume stream
 			}
 
-			// Verify second request only sends the latest user message
+			// Verify second request only sends the latest user message in structured format
 			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.input).toBe("User: What's the weather?")
-			expect(secondCallBody.input).not.toContain("Hello")
-			expect(secondCallBody.input).not.toContain("Hi there!")
-			expect(secondCallBody.input).not.toContain("How are you?")
+			expect(secondCallBody.input).toEqual([
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "What's the weather?" }],
+				},
+			])
 			expect(secondCallBody.previous_response_id).toBe("resp_001")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
-		it("should correctly prepare GPT-5 input with conversation continuity", () => {
+		it("should correctly prepare structured input", () => {
 			const gpt5Handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
 			})
 
+			// Test with metadata that has previousResponseId
 			// @ts-expect-error - private method
-			const { formattedInput, previousResponseId } = gpt5Handler.prepareGpt5Input(systemPrompt, messages, {
-				taskId: "task1",
-				previousResponseId: "resp_123",
-			})
+			const { formattedInput, previousResponseId } = gpt5Handler.prepareResponsesApiInput(
+				systemPrompt,
+				messages,
+				{
+					taskId: "task1",
+					previousResponseId: "resp_123",
+				},
+			)
 
 			expect(previousResponseId).toBe("resp_123")
-			expect(formattedInput).toBe("User: Hello!")
+			expect(formattedInput).toEqual([
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "Hello!" }],
+				},
+			])
 		})
 
 		it("should provide helpful error messages for different error codes", async () => {
 			const testCases = [
-				{ status: 400, expectedMessage: "Invalid request to GPT-5 API" },
+				{ status: 400, expectedMessage: "Invalid request to Responses API" },
 				{ status: 401, expectedMessage: "Authentication failed" },
 				{ status: 403, expectedMessage: "Access denied" },
-				{ status: 404, expectedMessage: "GPT-5 API endpoint not found" },
+				{ status: 404, expectedMessage: "Responses API endpoint not found" },
 				{ status: 429, expectedMessage: "Rate limit exceeded" },
 				{ status: 500, expectedMessage: "OpenAI service error" },
 			]
@@ -1417,6 +966,9 @@ describe("OpenAiNativeHandler", () => {
 				})
 				global.fetch = mockFetch as any
 
+				// Mock SDK to fail
+				mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 				handler = new OpenAiNativeHandler({
 					...mockOptions,
 					apiModelId: "gpt-5-2025-08-07",
@@ -1429,17 +981,22 @@ describe("OpenAiNativeHandler", () => {
 						// Should throw before yielding anything
 					}
 				}).rejects.toThrow(expectedMessage)
-			}
 
-			// Clean up
-			delete (global as any).fetch
+				// Clean up
+				delete (global as any).fetch
+			}
 		})
 	})
 })
 
-// Added tests for GPT-5 streaming event coverage per PR_review_gpt5_final.md
-
+// Additional tests for GPT-5 streaming event coverage
 describe("GPT-5 streaming event coverage (additional)", () => {
+	afterEach(() => {
+		if ((global as any).fetch) {
+			delete (global as any).fetch
+		}
+	})
+
 	it("should handle reasoning delta events for GPT-5", async () => {
 		const mockFetch = vitest.fn().mockResolvedValue({
 			ok: true,
@@ -1458,8 +1015,10 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 				},
 			}),
 		})
-		// @ts-ignore
-		global.fetch = mockFetch
+		global.fetch = mockFetch as any
+
+		// Mock SDK to fail
+		mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
 
 		const handler = new OpenAiNativeHandler({
 			apiModelId: "gpt-5-2025-08-07",
@@ -1482,9 +1041,6 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 		expect(reasoningChunks[0].text).toBe("Thinking about the problem...")
 		expect(textChunks).toHaveLength(1)
 		expect(textChunks[0].text).toBe("The answer is...")
-
-		// @ts-ignore
-		delete global.fetch
 	})
 
 	it("should handle refusal delta events for GPT-5 and prefix output", async () => {
@@ -1502,8 +1058,10 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 				},
 			}),
 		})
-		// @ts-ignore
-		global.fetch = mockFetch
+		global.fetch = mockFetch as any
+
+		// Mock SDK to fail
+		mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
 
 		const handler = new OpenAiNativeHandler({
 			apiModelId: "gpt-5-2025-08-07",
@@ -1522,9 +1080,6 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 		const textChunks = chunks.filter((c) => c.type === "text")
 		expect(textChunks).toHaveLength(1)
 		expect(textChunks[0].text).toBe("[Refusal] I cannot comply with this request.")
-
-		// @ts-ignore
-		delete global.fetch
 	})
 
 	it("should ignore malformed JSON lines in SSE stream", async () => {
@@ -1552,8 +1107,10 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 				},
 			}),
 		})
-		// @ts-ignore
-		global.fetch = mockFetch
+		global.fetch = mockFetch as any
+
+		// Mock SDK to fail
+		mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
 
 		const handler = new OpenAiNativeHandler({
 			apiModelId: "gpt-5-2025-08-07",
@@ -1572,9 +1129,6 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 		// It should not throw and still capture the valid texts around the malformed line
 		const textChunks = chunks.filter((c) => c.type === "text")
 		expect(textChunks.map((c: any) => c.text)).toEqual(["Before", "After"])
-
-		// @ts-ignore
-		delete global.fetch
 	})
 
 	describe("Codex Mini Model", () => {
@@ -1619,6 +1173,9 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "codex-mini-latest",
@@ -1671,12 +1228,18 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
 			expect(requestBody).toMatchObject({
 				model: "codex-mini-latest",
-				input: "Developer: You are a helpful coding assistant.\n\nUser: Write a hello world function",
+				input: [
+					{
+						role: "developer",
+						content: [{ type: "input_text", text: "You are a helpful coding assistant." }],
+					},
+					{
+						role: "user",
+						content: [{ type: "input_text", text: "Write a hello world function" }],
+					},
+				],
 				stream: true,
 			})
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should handle codex-mini-latest non-streaming completion", async () => {
@@ -1687,7 +1250,7 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 
 			// Codex Mini now uses the same Responses API as GPT-5, which doesn't support non-streaming
 			await expect(handler.completePrompt("Write a hello world function in Python")).rejects.toThrow(
-				"completePrompt is not supported for codex-mini-latest. Use createMessage (Responses API) instead.",
+				"completePrompt is not supported. Use createMessage (Responses API) instead.",
 			)
 		})
 
@@ -1701,6 +1264,9 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "codex-mini-latest",
@@ -1717,9 +1283,6 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 					// consume stream
 				}
 			}).rejects.toThrow("Rate limit exceeded")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 
 		it("should handle codex-mini-latest with multiple user messages", async () => {
@@ -1741,6 +1304,9 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "codex-mini-latest",
@@ -1759,15 +1325,26 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 				chunks.push(chunk)
 			}
 
-			// Verify the request body includes full conversation like GPT-5
+			// Verify the request body includes full conversation in structured format
 			const requestBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(requestBody.input).toContain("Developer: You are a helpful assistant")
-			expect(requestBody.input).toContain("User: First question")
-			expect(requestBody.input).toContain("Assistant: First answer")
-			expect(requestBody.input).toContain("User: Second question")
-
-			// Clean up
-			delete (global as any).fetch
+			expect(requestBody.input).toEqual([
+				{
+					role: "developer",
+					content: [{ type: "input_text", text: "You are a helpful assistant." }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "First question" }],
+				},
+				{
+					role: "assistant",
+					content: [{ type: "output_text", text: "First answer" }],
+				},
+				{
+					role: "user",
+					content: [{ type: "input_text", text: "Second question" }],
+				},
+			])
 		})
 
 		it("should handle codex-mini-latest stream error events", async () => {
@@ -1793,6 +1370,9 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 			})
 			global.fetch = mockFetch as any
 
+			// Mock SDK to fail
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
 			handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "codex-mini-latest",
@@ -1810,9 +1390,6 @@ describe("GPT-5 streaming event coverage (additional)", () => {
 					chunks.push(chunk)
 				}
 			}).rejects.toThrow("Responses API error: Model overloaded")
-
-			// Clean up
-			delete (global as any).fetch
 		})
 	})
 })
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index 2ba85669631..3923d63a192 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -28,6 +28,9 @@ export type OpenAiNativeModel = ReturnType<OpenAiNativeHandler["getModel"]>
 
 // GPT-5 specific types
 
+// Constants for model identification
+const GPT5_MODEL_PREFIX = "gpt-5"
+
 export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -35,8 +38,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	private responseIdPromise: Promise<string | undefined> | undefined
 	private responseIdResolver: ((value: string | undefined) => void) | undefined
 
-	// Event types handled by the shared GPT-5 event processor to avoid duplication
-	private readonly gpt5CoreHandledTypes = new Set<string>([
+	// Event types handled by the shared event processor to avoid duplication
+	private readonly coreHandledEventTypes = new Set<string>([
 		"response.text.delta",
 		"response.output_text.delta",
 		"response.reasoning.delta",
@@ -60,7 +63,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey })
 	}
 
-	private normalizeGpt5Usage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
+	private normalizeUsage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
 		if (!usage) return undefined
 
 		const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
@@ -103,114 +106,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
 		const model = this.getModel()
-		let id: "o3-mini" | "o3" | "o4-mini" | undefined
-
-		if (model.id.startsWith("o3-mini")) {
-			id = "o3-mini"
-		} else if (model.id.startsWith("o3")) {
-			id = "o3"
-		} else if (model.id.startsWith("o4-mini")) {
-			id = "o4-mini"
-		}
-
-		if (id) {
-			yield* this.handleReasonerMessage(model, id, systemPrompt, messages)
-		} else if (model.id.startsWith("o1")) {
-			yield* this.handleO1FamilyMessage(model, systemPrompt, messages)
-		} else if (this.isResponsesApiModel(model.id)) {
-			// Both GPT-5 and Codex Mini use the v1/responses endpoint
-			yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata)
-		} else {
-			yield* this.handleDefaultModelMessage(model, systemPrompt, messages)
-		}
-	}
-
-	private async *handleO1FamilyMessage(
-		model: OpenAiNativeModel,
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-	): ApiStream {
-		// o1 supports developer prompt with formatting
-		// o1-preview and o1-mini only support user messages
-		const isOriginalO1 = model.id === "o1"
-		const { reasoning } = this.getModel()
-
-		const response = await this.client.chat.completions.create({
-			model: model.id,
-			messages: [
-				{
-					role: isOriginalO1 ? "developer" : "user",
-					content: isOriginalO1 ? `Formatting re-enabled\n${systemPrompt}` : systemPrompt,
-				},
-				...convertToOpenAiMessages(messages),
-			],
-			stream: true,
-			stream_options: { include_usage: true },
-			...(reasoning && reasoning),
-		})
 
-		yield* this.handleStreamResponse(response, model)
-	}
-
-	private async *handleReasonerMessage(
-		model: OpenAiNativeModel,
-		family: "o3-mini" | "o3" | "o4-mini",
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-	): ApiStream {
-		const { reasoning } = this.getModel()
-
-		const stream = await this.client.chat.completions.create({
-			model: family,
-			messages: [
-				{
-					role: "developer",
-					content: `Formatting re-enabled\n${systemPrompt}`,
-				},
-				...convertToOpenAiMessages(messages),
-			],
-			stream: true,
-			stream_options: { include_usage: true },
-			...(reasoning && reasoning),
-		})
-
-		yield* this.handleStreamResponse(stream, model)
-	}
-
-	private async *handleDefaultModelMessage(
-		model: OpenAiNativeModel,
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-	): ApiStream {
-		const { reasoning, verbosity } = this.getModel()
-
-		// Prepare the request parameters
-		const params: any = {
-			model: model.id,
-			temperature: this.options.modelTemperature ?? OPENAI_NATIVE_DEFAULT_TEMPERATURE,
-			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
-			stream: true,
-			stream_options: { include_usage: true },
-			...(reasoning && reasoning),
-		}
-
-		// Add verbosity only if the model supports it
-		if (verbosity && model.info.supportsVerbosity) {
-			params.verbosity = verbosity
-		}
-
-		const stream = await this.client.chat.completions.create(params)
-
-		if (typeof (stream as any)[Symbol.asyncIterator] !== "function") {
-			throw new Error(
-				"OpenAI SDK did not return an AsyncIterable for streaming response. Please check SDK version and usage.",
-			)
-		}
-
-		yield* this.handleStreamResponse(
-			stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
-			model,
-		)
+		// Use Responses API for ALL models
+		yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata)
 	}
 
 	private async *handleResponsesApiMessage(
@@ -219,20 +117,24 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		// Prefer the official SDK Responses API with streaming; fall back to fetch-based SSE if needed.
-		const { verbosity } = this.getModel()
-
-		// Both GPT-5 and Codex Mini use the same v1/responses endpoint format
+		// Use Responses API for ALL models
+		const { verbosity, reasoning } = this.getModel()
 
-		// Resolve reasoning effort (supports "minimal" for GPT‑5)
-		const reasoningEffort = this.getGpt5ReasoningEffort(model)
+		// Resolve reasoning effort for models that support it
+		const reasoningEffort = this.getReasoningEffort(model)
 
 		// Wait for any pending response ID from a previous request to be available
 		// This handles the race condition with fast nano model responses
 		let effectivePreviousResponseId = metadata?.previousResponseId
 
-		// Only allow fallback to pending/last response id when not explicitly suppressed
-		if (!metadata?.suppressPreviousResponseId) {
+		// Check if we should suppress previous response ID (e.g., after condense or message edit)
+		if (metadata?.suppressPreviousResponseId) {
+			// Clear the stored lastResponseId to prevent it from being used in future requests
+			this.lastResponseId = undefined
+			effectivePreviousResponseId = undefined
+		} else {
+			// Only try to get fallback response IDs if not suppressing
+
 			// If we have a pending response ID promise, wait for it to resolve
 			if (!effectivePreviousResponseId && this.responseIdPromise) {
 				try {
@@ -250,20 +152,44 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			}
 
 			// Fall back to the last known response ID if still not available
-			if (!effectivePreviousResponseId) {
+			if (!effectivePreviousResponseId && this.lastResponseId) {
 				effectivePreviousResponseId = this.lastResponseId
 			}
 		}
 
 		// Format input and capture continuity id
-		const { formattedInput, previousResponseId } = this.prepareGpt5Input(systemPrompt, messages, metadata)
-		const requestPreviousResponseId = effectivePreviousResponseId ?? previousResponseId
+		const { formattedInput, previousResponseId } = this.prepareResponsesApiInput(systemPrompt, messages, metadata)
+		const requestPreviousResponseId = effectivePreviousResponseId || previousResponseId
 
 		// Create a new promise for this request's response ID
 		this.responseIdPromise = new Promise<string | undefined>((resolve) => {
 			this.responseIdResolver = resolve
 		})
 
+		// Build request body
+		const requestBody = this.buildRequestBody(
+			model,
+			formattedInput,
+			requestPreviousResponseId,
+			systemPrompt,
+			verbosity,
+			reasoningEffort,
+			metadata,
+		)
+
+		// Make the request
+		yield* this.executeRequest(requestBody, model, metadata)
+	}
+
+	private buildRequestBody(
+		model: OpenAiNativeModel,
+		formattedInput: any,
+		requestPreviousResponseId: string | undefined,
+		systemPrompt: string,
+		verbosity: any,
+		reasoningEffort: ReasoningEffortWithMinimal | undefined,
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): any {
 		// Build a request body (also used for fallback)
 		// Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
 		// so requests do not default to very large limits (e.g., 120k).
@@ -276,12 +202,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			temperature?: number
 			max_output_tokens?: number
 			previous_response_id?: string
+			store?: boolean
+			instructions?: string
 		}
 
-		const requestBody: Gpt5RequestBody = {
+		return {
 			model: model.id,
 			input: formattedInput,
 			stream: true,
+			store: metadata?.store !== false, // Default to true unless explicitly set to false
+			// Always include instructions (system prompt) when using previous_response_id
+			// This ensures the system prompt stays up-to-date even if it changes (e.g., mode switch)
+			...(requestPreviousResponseId && { instructions: systemPrompt }),
 			...(reasoningEffort && {
 				reasoning: {
 					effort: reasoningEffort,
@@ -289,13 +221,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				},
 			}),
 			text: { verbosity: (verbosity || "medium") as VerbosityLevel },
-			temperature: this.options.modelTemperature ?? GPT5_DEFAULT_TEMPERATURE,
-			// Explicitly include the calculated max output tokens for GPT‑5.
+			// Only include temperature if the model supports it
+			...(model.info.supportsTemperature !== false && {
+				temperature:
+					this.options.modelTemperature ??
+					(model.id.startsWith(GPT5_MODEL_PREFIX)
+						? GPT5_DEFAULT_TEMPERATURE
+						: OPENAI_NATIVE_DEFAULT_TEMPERATURE),
+			}),
+			// Explicitly include the calculated max output tokens.
 			// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
 			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
 			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
 		}
+	}
 
+	private async *executeRequest(
+		requestBody: any,
+		model: OpenAiNativeModel,
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
 		try {
 			// Use the official SDK
 			const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>
@@ -307,7 +252,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			}
 
 			for await (const event of stream) {
-				for await (const outChunk of this.processGpt5Event(event, model)) {
+				for await (const outChunk of this.processEvent(event, model)) {
 					yield outChunk
 				}
 			}
@@ -321,7 +266,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
 				// Log the error and retry without the previous_response_id
 				console.warn(
-					`[GPT-5] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
+					`[Responses API] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
 				)
 
 				// Remove the problematic previous_response_id and retry
@@ -344,7 +289,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 					}
 
 					for await (const event of retryStream) {
-						for await (const outChunk of this.processGpt5Event(event, model)) {
+						for await (const outChunk of this.processEvent(event, model)) {
 							yield outChunk
 						}
 					}
@@ -361,52 +306,88 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		}
 	}
 
-	private formatInputForResponsesAPI(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string {
-		// Format the conversation for the Responses API input field
-		// Use Developer role format for GPT-5 (aligning with o1/o3 Developer role usage per GPT-5 Responses guidance)
-		// This ensures consistent instruction handling across reasoning models
-		let formattedInput = `Developer: ${systemPrompt}\n\n`
+	private formatFullConversation(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): any {
+		// Format the entire conversation history for the Responses API using structured format
+		// This supports both text and images
+		const formattedMessages: any[] = []
+
+		// Add system prompt as developer message
+		formattedMessages.push({
+			role: "developer",
+			content: [{ type: "input_text", text: systemPrompt }],
+		})
 
+		// Process each message
 		for (const message of messages) {
-			const role = message.role === "user" ? "User" : "Assistant"
+			const role = message.role === "user" ? "user" : "assistant"
+			const content: any[] = []
 
-			// Handle text content
 			if (typeof message.content === "string") {
-				formattedInput += `${role}: ${message.content}\n\n`
+				// For user messages, use input_text; for assistant messages, use output_text
+				if (role === "user") {
+					content.push({ type: "input_text", text: message.content })
+				} else {
+					content.push({ type: "output_text", text: message.content })
+				}
 			} else if (Array.isArray(message.content)) {
-				// Handle content blocks
-				const textContent = message.content
-					.filter((block) => block.type === "text")
-					.map((block) => (block as any).text)
-					.join("\n")
-				if (textContent) {
-					formattedInput += `${role}: ${textContent}\n\n`
+				// For array content with potential images, format properly
+				for (const block of message.content) {
+					if (block.type === "text") {
+						// For user messages, use input_text; for assistant messages, use output_text
+						if (role === "user") {
+							content.push({ type: "input_text", text: (block as any).text })
+						} else {
+							content.push({ type: "output_text", text: (block as any).text })
+						}
+					} else if (block.type === "image") {
+						const image = block as Anthropic.Messages.ImageBlockParam
+						// Format image with proper data URL - images are always input_image
+						const imageUrl = `data:${image.source.media_type};base64,${image.source.data}`
+						content.push({ type: "input_image", image_url: imageUrl })
+					}
 				}
 			}
+
+			if (content.length > 0) {
+				formattedMessages.push({ role, content })
+			}
 		}
 
-		return formattedInput.trim()
+		return formattedMessages
 	}
 
-	private formatSingleMessageForResponsesAPI(message: Anthropic.Messages.MessageParam): string {
+	private formatSingleStructuredMessage(message: Anthropic.Messages.MessageParam): any {
 		// Format a single message for the Responses API when using previous_response_id
-		const role = message.role === "user" ? "User" : "Assistant"
+		// When using previous_response_id, we only send the latest user message
+		const role = message.role === "user" ? "user" : "assistant"
 
-		// Handle text content
 		if (typeof message.content === "string") {
-			return `${role}: ${message.content}`
+			// For simple string content, return structured format with proper type
+			return {
+				role,
+				content: [{ type: "input_text", text: message.content }],
+			}
 		} else if (Array.isArray(message.content)) {
-			// Handle content blocks
-			const textContent = message.content
-				.filter((block) => block.type === "text")
-				.map((block) => (block as any).text)
-				.join("\n")
-			if (textContent) {
-				return `${role}: ${textContent}`
+			// Extract text and image content from blocks
+			const content: any[] = []
+
+			for (const block of message.content) {
+				if (block.type === "text") {
+					// User messages use input_text
+					content.push({ type: "input_text", text: (block as any).text })
+				} else if (block.type === "image") {
+					const image = block as Anthropic.Messages.ImageBlockParam
+					const imageUrl = `data:${image.source.media_type};base64,${image.source.data}`
+					content.push({ type: "input_image", image_url: imageUrl })
+				}
+			}
+
+			if (content.length > 0) {
+				return { role, content }
 			}
 		}
 
-		return ""
+		return null
 	}
 
 	private async *makeGpt5ResponsesAPIRequest(
@@ -457,7 +438,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
 					// Log the error and retry without the previous_response_id
 					console.warn(
-						`[GPT-5 SSE] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
+						`[Responses API] Previous response ID not found (${requestBody.previous_response_id}), retrying without it`,
 					)
 
 					// Remove the problematic previous_response_id and retry
@@ -482,32 +463,32 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 					if (!retryResponse.ok) {
 						// If retry also fails, throw the original error
-						throw new Error(`GPT-5 API retry failed (${retryResponse.status})`)
+						throw new Error(`Responses API retry failed (${retryResponse.status})`)
 					}
 
 					if (!retryResponse.body) {
-						throw new Error("GPT-5 Responses API error: No response body from retry request")
+						throw new Error("Responses API error: No response body from retry request")
 					}
 
 					// Handle the successful retry response
-					yield* this.handleGpt5StreamResponse(retryResponse.body, model)
+					yield* this.handleStreamResponse(retryResponse.body, model)
 					return
 				}
 
 				// Provide user-friendly error messages based on status code
 				switch (response.status) {
 					case 400:
-						errorMessage = "Invalid request to GPT-5 API. Please check your input parameters."
+						errorMessage = "Invalid request to Responses API. Please check your input parameters."
 						break
 					case 401:
 						errorMessage = "Authentication failed. Please check your OpenAI API key."
 						break
 					case 403:
-						errorMessage = "Access denied. Your API key may not have access to GPT-5 models."
+						errorMessage = "Access denied. Your API key may not have access to this endpoint."
 						break
 					case 404:
 						errorMessage =
-							"GPT-5 API endpoint not found. The model may not be available yet or requires a different configuration."
+							"Responses API endpoint not found. The endpoint may not be available yet or requires a different configuration."
 						break
 					case 429:
 						errorMessage = "Rate limit exceeded. Please try again later."
@@ -518,7 +499,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 						errorMessage = "OpenAI service error. Please try again later."
 						break
 					default:
-						errorMessage = `GPT-5 API error (${response.status})`
+						errorMessage = `Responses API error (${response.status})`
 				}
 
 				// Append details if available
@@ -530,73 +511,74 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			}
 
 			if (!response.body) {
-				throw new Error("GPT-5 Responses API error: No response body")
+				throw new Error("Responses API error: No response body")
 			}
 
 			// Handle streaming response
-			yield* this.handleGpt5StreamResponse(response.body, model)
+			yield* this.handleStreamResponse(response.body, model)
 		} catch (error) {
 			if (error instanceof Error) {
 				// Re-throw with the original error message if it's already formatted
-				if (error.message.includes("GPT-5")) {
+				if (error.message.includes("Responses API")) {
 					throw error
 				}
 				// Otherwise, wrap it with context
-				throw new Error(`Failed to connect to GPT-5 API: ${error.message}`)
+				throw new Error(`Failed to connect to Responses API: ${error.message}`)
 			}
 			// Handle non-Error objects
-			throw new Error(`Unexpected error connecting to GPT-5 API`)
+			throw new Error(`Unexpected error connecting to Responses API`)
 		}
 	}
 
 	/**
-	 * Prepares the input and conversation continuity parameters for a GPT-5 API call.
+	 * Prepares the input and conversation continuity parameters for a Responses API call.
+	 * Decides whether to send full conversation or just the latest message based on previousResponseId.
 	 *
 	 * - If a `previousResponseId` is available (either from metadata or the handler's state),
 	 *   it formats only the most recent user message for the input and returns the response ID
 	 *   to maintain conversation context.
 	 * - Otherwise, it formats the entire conversation history (system prompt + messages) for the input.
 	 *
-	 * @returns An object containing the formatted input string and the previous response ID (if used).
+	 * @returns An object containing the formatted input and the previous response ID (if used).
 	 */
-	private prepareGpt5Input(
+	private prepareResponsesApiInput(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
-	): { formattedInput: string; previousResponseId?: string } {
-		// Respect explicit suppression signal for continuity (e.g. immediately after condense)
-		const isFirstMessage = messages.length === 1 && messages[0].role === "user"
-		const allowFallback = !metadata?.suppressPreviousResponseId
+	): { formattedInput: any; previousResponseId?: string } {
+		// Note: suppressPreviousResponseId is handled in handleResponsesApiMessage
+		// This method now only handles formatting based on whether we have a previous response ID
 
-		const previousResponseId =
-			metadata?.previousResponseId ?? (allowFallback && !isFirstMessage ? this.lastResponseId : undefined)
+		// Check for previous response ID from metadata or fallback to lastResponseId
+		const isFirstMessage = messages.length === 1 && messages[0].role === "user"
+		const previousResponseId = metadata?.previousResponseId ?? (!isFirstMessage ? this.lastResponseId : undefined)
 
 		if (previousResponseId) {
+			// When using previous_response_id, only send the latest user message
 			const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user")
-			const formattedInput = lastUserMessage ? this.formatSingleMessageForResponsesAPI(lastUserMessage) : ""
-			return { formattedInput, previousResponseId }
+			if (lastUserMessage) {
+				const formattedMessage = this.formatSingleStructuredMessage(lastUserMessage)
+				// formatSingleStructuredMessage now always returns an object with role and content
+				if (formattedMessage) {
+					return { formattedInput: [formattedMessage], previousResponseId }
+				}
+			}
+			return { formattedInput: [], previousResponseId }
 		} else {
-			const formattedInput = this.formatInputForResponsesAPI(systemPrompt, messages)
+			// Format full conversation history (returns an array of structured messages)
+			const formattedInput = this.formatFullConversation(systemPrompt, messages)
 			return { formattedInput }
 		}
 	}
 
 	/**
-	 * Handles the streaming response from the GPT-5 Responses API.
+	 * Handles the streaming response from the Responses API.
 	 *
 	 * This function iterates through the Server-Sent Events (SSE) stream, parses each event,
 	 * and yields structured data chunks (`ApiStream`). It handles a wide variety of event types,
 	 * including text deltas, reasoning, usage data, and various status/tool events.
-	 *
-	 * The following event types are intentionally ignored as they are not currently consumed
-	 * by the client application:
-	 * - Audio events (`response.audio.*`)
-	 * - Most tool call events (e.g., `response.function_call_arguments.*`, `response.mcp_call.*`, etc.)
-	 *   as the client does not yet support rendering these tool interactions.
-	 * - Status events (`response.created`, `response.in_progress`, etc.) as they are informational
-	 *   and do not affect the final output.
 	 */
-	private async *handleGpt5StreamResponse(body: ReadableStream<Uint8Array>, model: OpenAiNativeModel): ApiStream {
+	private async *handleStreamResponse(body: ReadableStream<Uint8Array>, model: OpenAiNativeModel): ApiStream {
 		const reader = body.getReader()
 		const decoder = new TextDecoder()
 		let buffer = ""
@@ -629,8 +611,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 							}
 
 							// Delegate standard event types to the shared processor to avoid duplication
-							if (parsed?.type && this.gpt5CoreHandledTypes.has(parsed.type)) {
-								for await (const outChunk of this.processGpt5Event(parsed, model)) {
+							if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
+								for await (const outChunk of this.processEvent(parsed, model)) {
 									// Track whether we've emitted any content so fallback handling can decide appropriately
 									if (outChunk.type === "text" || outChunk.type === "reasoning") {
 										hasContent = true
@@ -670,7 +652,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 								}
 								// Check for usage in the complete response
 								if (parsed.response.usage) {
-									const usageData = this.normalizeGpt5Usage(parsed.response.usage, model)
+									const usageData = this.normalizeUsage(parsed.response.usage, model)
 									if (usageData) {
 										yield usageData
 									}
@@ -910,7 +892,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 								// Response failed
 								if (parsed.error || parsed.message) {
 									throw new Error(
-										`GPT-5 response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`,
+										`Response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`,
 									)
 								}
 							} else if (parsed.type === "response.completed" || parsed.type === "response.done") {
@@ -990,7 +972,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 								}
 							} else if (parsed.usage) {
 								// Handle usage if it arrives in a separate, non-completed event
-								const usageData = this.normalizeGpt5Usage(parsed.usage, model)
+								const usageData = this.normalizeUsage(parsed.usage, model)
 								if (usageData) {
 									yield usageData
 								}
@@ -1026,19 +1008,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			// This can happen in certain edge cases and shouldn't break the flow
 		} catch (error) {
 			if (error instanceof Error) {
-				throw new Error(`Error processing GPT-5 response stream: ${error.message}`)
+				throw new Error(`Error processing response stream: ${error.message}`)
 			}
-			throw new Error("Unexpected error processing GPT-5 response stream")
+			throw new Error("Unexpected error processing response stream")
 		} finally {
 			reader.releaseLock()
 		}
 	}
 
 	/**
-	 * Shared processor for GPT‑5 Responses API events.
-	 * Used by both the official SDK streaming path and (optionally) by the SSE fallback.
+	 * Shared processor for Responses API events.
 	 */
-	private async *processGpt5Event(event: any, model: OpenAiNativeModel): ApiStream {
+	private async *processEvent(event: any, model: OpenAiNativeModel): ApiStream {
 		// Persist response id for conversation continuity when available
 		if (event?.response?.id) {
 			this.resolveResponseId(event.response.id)
@@ -1096,7 +1077,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		// Completion events that may carry usage
 		if (event?.type === "response.done" || event?.type === "response.completed") {
 			const usage = event?.response?.usage || event?.usage || undefined
-			const usageData = this.normalizeGpt5Usage(usage, model)
+			const usageData = this.normalizeUsage(usage, model)
 			if (usageData) {
 				yield usageData
 			}
@@ -1110,87 +1091,30 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		}
 
 		if (event?.usage) {
-			const usageData = this.normalizeGpt5Usage(event.usage, model)
+			const usageData = this.normalizeUsage(event.usage, model)
 			if (usageData) {
 				yield usageData
 			}
 		}
 	}
 
-	private getGpt5ReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined {
+	private getReasoningEffort(model: OpenAiNativeModel): ReasoningEffortWithMinimal | undefined {
 		const { reasoning, info } = model
 
 		// Check if reasoning effort is configured
 		if (reasoning && "reasoning_effort" in reasoning) {
 			const effort = reasoning.reasoning_effort as string
-			// Support all effort levels including "minimal" for GPT-5
+			// Support all effort levels
 			if (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high") {
 				return effort as ReasoningEffortWithMinimal
 			}
 		}
 
-		// Centralize default: use the model's default from types if available; otherwise undefined
+		// Use the model's default from types if available
 		return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
 	}
 
-	private isGpt5Model(modelId: string): boolean {
-		return modelId.startsWith("gpt-5")
-	}
-
-	private isResponsesApiModel(modelId: string): boolean {
-		// Both GPT-5 and Codex Mini use the v1/responses endpoint
-		return modelId.startsWith("gpt-5") || modelId === "codex-mini-latest"
-	}
-
-	private async *handleStreamResponse(
-		stream: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>,
-		model: OpenAiNativeModel,
-	): ApiStream {
-		for await (const chunk of stream) {
-			const delta = chunk.choices[0]?.delta
-
-			if (delta?.content) {
-				yield {
-					type: "text",
-					text: delta.content,
-				}
-			}
-
-			if (chunk.usage) {
-				yield* this.yieldUsage(model.info, chunk.usage)
-			}
-		}
-	}
-
-	private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
-		const inputTokens = usage?.prompt_tokens || 0
-		const outputTokens = usage?.completion_tokens || 0
-
-		// Extract cache tokens from prompt_tokens_details
-		// According to OpenAI API, cached_tokens represents tokens read from cache
-		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || undefined
-
-		// Cache write tokens are not typically reported in the standard streaming response
-		// They would be in cache_creation_input_tokens if available
-		const cacheWriteTokens = (usage as any)?.cache_creation_input_tokens || undefined
-
-		const totalCost = calculateApiCostOpenAI(
-			info,
-			inputTokens,
-			outputTokens,
-			cacheWriteTokens || 0,
-			cacheReadTokens || 0,
-		)
-
-		yield {
-			type: "usage",
-			inputTokens: inputTokens,
-			outputTokens: outputTokens,
-			cacheWriteTokens: cacheWriteTokens,
-			cacheReadTokens: cacheReadTokens,
-			totalCost: totalCost,
-		}
-	}
+	// Removed isResponsesApiModel method as ALL models now use the Responses API
 
 	override getModel() {
 		const modelId = this.options.apiModelId
@@ -1205,18 +1129,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			modelId: id,
 			model: info,
 			settings: this.options,
-			defaultTemperature: this.isGpt5Model(id) ? GPT5_DEFAULT_TEMPERATURE : OPENAI_NATIVE_DEFAULT_TEMPERATURE,
+			defaultTemperature: id.startsWith(GPT5_MODEL_PREFIX)
+				? GPT5_DEFAULT_TEMPERATURE
+				: OPENAI_NATIVE_DEFAULT_TEMPERATURE,
 		})
 
-		// For models using the Responses API (GPT-5 and Codex Mini), ensure we support reasoning effort
-		if (this.isResponsesApiModel(id)) {
-			const effort =
-				(this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ??
-				(info.reasoningEffort as ReasoningEffortWithMinimal | undefined)
+		// For models using the Responses API, ensure we support reasoning effort
+		const effort =
+			(this.options.reasoningEffort as ReasoningEffortWithMinimal | undefined) ??
+			(info.reasoningEffort as ReasoningEffortWithMinimal | undefined)
 
-			if (effort) {
-				;(params.reasoning as any) = { reasoning_effort: effort }
-			}
+		if (effort) {
+			;(params.reasoning as any) = { reasoning_effort: effort }
 		}
 
 		// The o3 models are named like "o3-mini-[reasoning-effort]", which are
@@ -1225,7 +1149,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	}
 
 	/**
-	 * Gets the last GPT-5 response ID captured from the Responses API stream.
+	 * Gets the last response ID captured from the Responses API stream.
 	 * Used for maintaining conversation continuity across requests.
 	 * @returns The response ID, or undefined if not available yet
 	 */
@@ -1234,46 +1158,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	}
 
 	/**
-	 * Sets the last GPT-5 response ID for conversation continuity.
+	 * Sets the last response ID for conversation continuity.
 	 * Typically only used in tests or special flows.
-	 * @param responseId The GPT-5 response ID to store
+	 * @param responseId The response ID to store
 	 */
 	setResponseId(responseId: string): void {
 		this.lastResponseId = responseId
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
-		try {
-			const { id, temperature, reasoning, verbosity } = this.getModel()
-			const isResponsesApi = this.isResponsesApiModel(id)
-
-			if (isResponsesApi) {
-				// Models that use the Responses API (GPT-5 and Codex Mini) don't support non-streaming completion
-				throw new Error(`completePrompt is not supported for ${id}. Use createMessage (Responses API) instead.`)
-			}
-
-			const params: any = {
-				model: id,
-				messages: [{ role: "user", content: prompt }],
-			}
-
-			// Add temperature if supported
-			if (temperature !== undefined) {
-				params.temperature = temperature
-			}
-
-			// Add reasoning parameters for models that support them
-			if (reasoning) {
-				Object.assign(params, reasoning)
-			}
-
-			const response = await this.client.chat.completions.create(params)
-			return response.choices[0]?.message.content || ""
-		} catch (error) {
-			if (error instanceof Error) {
-				throw new Error(`OpenAI Native completion error: ${error.message}`)
-			}
-			throw error
-		}
+		// ALL models now use the Responses API which doesn't support non-streaming completion
+		throw new Error(`completePrompt is not supported. Use createMessage (Responses API) instead.`)
 	}
 }
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 2103dacb274..f7cdb98b43f 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -99,6 +99,7 @@ import { getMessagesSinceLastSummary, summarizeConversation } from "../condense"
 import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning"
 import { restoreTodoListForTask } from "../tools/updateTodoListTool"
 import { AutoApprovalHandler } from "./AutoApprovalHandler"
+import { Gpt5Metadata, ClineMessageWithMetadata } from "./types"
 
 const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes
 
@@ -711,9 +712,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			this.emit(RooCodeEventName.TaskIdle, this.taskId)
 		}
 
-		console.log(`[Task#${this.taskId}] pWaitFor askResponse(${type}) -> blocking`)
+		// Wait for askResponse to be set
 		await pWaitFor(() => this.askResponse !== undefined || this.lastMessageTs !== askTs, { interval: 100 })
-		console.log(`[Task#${this.taskId}] pWaitFor askResponse(${type}) -> unblocked (${this.askResponse})`)
 
 		if (this.lastMessageTs !== askTs) {
 			// Could happen if we send multiple asks in a row i.e. with
@@ -837,6 +837,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			return
 		}
 		await this.overwriteApiConversationHistory(messages)
+
+		// Set flag to skip previous_response_id on the next API call after manual condense
+		this.skipPrevResponseIdOnce = true
+
 		const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens }
 		await this.say(
 			"condense_context",
@@ -1008,7 +1012,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 		let imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(images)
 
-		console.log(`[subtasks] task ${this.taskId}.${this.instanceId} starting`)
+		// Task starting
 
 		await this.initiateTaskLoop([
 			{
@@ -1044,6 +1048,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	}
 
 	private async resumeTaskFromHistory() {
+		// Resuming task from history
+
 		if (this.enableTaskBridge) {
 			try {
 				this.bridgeService = this.bridgeService || UnifiedBridgeService.getInstance()
@@ -1060,6 +1066,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 		const modifiedClineMessages = await this.getSavedClineMessages()
 
+		// Check for any stored GPT-5 response IDs in the message history
+		const gpt5Messages = modifiedClineMessages.filter(
+			(m) =>
+				m.type === "say" &&
+				m.say === "text" &&
+				(m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id,
+		)
+		if (gpt5Messages.length > 0) {
+			const lastGpt5Message = gpt5Messages[gpt5Messages.length - 1] as ClineMessage & ClineMessageWithMetadata
+		}
+
 		// Remove any resume messages that may have been added before
 		const lastRelevantMessageIndex = findLastIndex(
 			modifiedClineMessages,
@@ -1289,13 +1306,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 		await this.overwriteApiConversationHistory(modifiedApiConversationHistory)
 
-		console.log(`[subtasks] task ${this.taskId}.${this.instanceId} resuming from history item`)
+		// Task resuming from history item
 
 		await this.initiateTaskLoop(newUserContent)
 	}
 
 	public dispose(): void {
-		console.log(`[Task] disposing task ${this.taskId}.${this.instanceId}`)
+		// Disposing task
 
 		// Stop waiting for child task completion.
 		if (this.pauseInterval) {
@@ -1358,7 +1375,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	}
 
 	public async abortTask(isAbandoned = false) {
-		console.log(`[subtasks] aborting task ${this.taskId}.${this.instanceId}`)
+		// Aborting task
 
 		// Will stop any autonomously running promises.
 		if (isAbandoned) {
@@ -1598,7 +1615,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					// lastMessage.ts = Date.now() DO NOT update ts since it is used as a key for virtuoso list
 					lastMessage.partial = false
 					// instead of streaming partialMessage events, we do a save and post like normal to persist to disk
-					console.log("updating partial message", lastMessage)
+					// updating partial message
 					// await this.saveClineMessages()
 				}
 
@@ -1699,7 +1716,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					}
 
 					if (this.abort) {
-						console.log(`aborting stream, this.abandoned = ${this.abandoned}`)
+						// Aborting stream
 
 						if (!this.abandoned) {
 							// Only need to gracefully abort if this instance
@@ -2126,23 +2143,27 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					this.clineMessages,
 					(m) =>
 						m.type === "say" &&
-						(m as any).say === "text" &&
-						(m as any).metadata?.gpt5?.previous_response_id,
+						m.say === "text" &&
+						!!(m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id,
 				)
 				if (idx !== -1) {
 					// Use the previous_response_id from the last assistant message for this request
-					previousResponseId = ((this.clineMessages[idx] as any).metadata.gpt5.previous_response_id ||
-						undefined) as string | undefined
+					const message = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata
+					previousResponseId = message.metadata?.gpt5?.previous_response_id
 				}
+			} else if (this.skipPrevResponseIdOnce) {
+				// Skipping previous_response_id due to recent condense operation - will send full conversation context
 			}
-		} catch {
+		} catch (error) {
+			console.error(`[Task#${this.taskId}] Error retrieving GPT-5 response ID:`, error)
 			// non-fatal
 		}
 
 		const metadata: ApiHandlerCreateMessageMetadata = {
 			mode: mode,
 			taskId: this.taskId,
-			...(previousResponseId ? { previousResponseId } : {}),
+			// Only include previousResponseId if we're NOT suppressing it
+			...(previousResponseId && !this.skipPrevResponseIdOnce ? { previousResponseId } : {}),
 			// If a condense just occurred, explicitly suppress continuity fallback for the next call
 			...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}),
 		}
@@ -2307,19 +2328,23 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			const lastResponseId: string | undefined = (this.api as any)?.getLastResponseId?.()
 			const idx = findLastIndex(
 				this.clineMessages,
-				(m) => m.type === "say" && (m as any).say === "text" && m.partial !== true,
+				(m) => m.type === "say" && m.say === "text" && m.partial !== true,
 			)
 			if (idx !== -1) {
-				const msg = this.clineMessages[idx] as any
-				msg.metadata = msg.metadata ?? {}
-				msg.metadata.gpt5 = {
+				const msg = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata
+				if (!msg.metadata) {
+					msg.metadata = {}
+				}
+				const gpt5Metadata: Gpt5Metadata = {
 					...(msg.metadata.gpt5 ?? {}),
 					previous_response_id: lastResponseId,
 					instructions: this.lastUsedInstructions,
 					reasoning_summary: (reasoningMessage ?? "").trim() || undefined,
 				}
+				msg.metadata.gpt5 = gpt5Metadata
 			}
-		} catch {
+		} catch (error) {
+			console.error(`[Task#${this.taskId}] Error persisting GPT-5 metadata:`, error)
 			// Non-fatal error in metadata persistence
 		}
 	}
diff --git a/src/core/task/types.ts b/src/core/task/types.ts
new file mode 100644
index 00000000000..607be51aab3
--- /dev/null
+++ b/src/core/task/types.ts
@@ -0,0 +1,37 @@
+/**
+ * Type definitions for Task-related metadata
+ */
+
+/**
+ * GPT-5 specific metadata stored with assistant messages
+ * for maintaining conversation continuity across requests
+ */
+export interface Gpt5Metadata {
+	/**
+	 * The response ID from the previous GPT-5 API response
+	 * Used to maintain conversation continuity in subsequent requests
+	 */
+	previous_response_id?: string
+
+	/**
+	 * The system instructions/prompt used for this response
+	 * Stored to track what instructions were active when the response was generated
+	 */
+	instructions?: string
+
+	/**
+	 * The reasoning summary from GPT-5's reasoning process
+	 * Contains the model's internal reasoning if reasoning mode was enabled
+	 */
+	reasoning_summary?: string
+}
+
+/**
+ * Extended ClineMessage type with GPT-5 metadata
+ */
+export interface ClineMessageWithMetadata {
+	metadata?: {
+		gpt5?: Gpt5Metadata
+		[key: string]: any
+	}
+}
diff --git a/src/package.json b/src/package.json
index 9d694b2bbb3..dcd3a842f28 100644
--- a/src/package.json
+++ b/src/package.json
@@ -458,7 +458,7 @@
 		"monaco-vscode-textmate-theme-converter": "^0.1.7",
 		"node-cache": "^5.1.2",
 		"node-ipc": "^12.0.0",
-		"openai": "^5.0.0",
+		"openai": "^5.12.2",
 		"os-name": "^6.0.0",
 		"p-limit": "^6.2.0",
 		"p-wait-for": "^5.0.2",