fix: omit the conversation in responses api.

Lagyu · Lagyu · commit cd512544ff7c · 2025-08-27T12:13:06.000+09:00
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
@@ -1860,3 +1860,130 @@ describe("OpenAI Compatible - Responses API parity improvements", () => {
 		expect(args.reasoning.summary).toBeUndefined()
 	})
 })
+
+describe("OpenAI Compatible - Responses API minimal input parity (new tests)", () => {
+	beforeEach(() => {
+		// @ts-ignore - reuse mocks from this spec module
+		mockCreate.mockClear()
+		// @ts-ignore - reuse mocks from this spec module
+		mockResponsesCreate.mockClear()
+	})
+
+	it("sends only latest user message when previous_response_id is provided (string input, no Developer preface)", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const msgs: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: [{ type: "text" as const, text: "First" }] },
+			{ role: "assistant", content: [{ type: "text" as const, text: "Reply" }] },
+			{ role: "user", content: [{ type: "text" as const, text: "Latest" }] },
+		]
+
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("System Inst", msgs, { previousResponseId: "prev-1" } as any)) {
+			chunks.push(ch)
+		}
+
+		// Ensure Responses API was used with minimal input
+		// @ts-ignore
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls[0][0]
+
+		expect(typeof args.input).toBe("string")
+		expect(args.input).toBe("User: Latest")
+		expect(String(args.input)).not.toContain("Developer: System Inst")
+	})
+
+	it("uses array input with only latest user content when previous_response_id and last user has images (no Developer preface)", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const msgs: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: [{ type: "text" as const, text: "Prev" }] },
+			{ role: "assistant", content: [{ type: "text" as const, text: "Ok" }] },
+			{
+				role: "user",
+				content: [
+					{ type: "text" as const, text: "See" },
+					{ type: "image" as const, source: { media_type: "image/png", data: "IMGDATA" } as any },
+				],
+			},
+		]
+
+		const iter = handler.createMessage("Sys", msgs, { previousResponseId: "prev-2" } as any)
+		for await (const _ of iter) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(Array.isArray(args.input)).toBe(true)
+
+		const arr = args.input as any[]
+		expect(arr.length).toBe(1)
+		expect(arr[0]?.role).toBe("user")
+
+		const contents = arr[0]?.content || []
+		const hasImg = contents.some((p: any) => p?.type === "input_image")
+		expect(hasImg).toBe(true)
+
+		// No Developer preface should be injected in minimal mode
+		const hasDev = contents.some(
+			(p: any) => p?.type === "input_text" && typeof p.text === "string" && p.text.includes("Developer:"),
+		)
+		expect(hasDev).toBe(false)
+	})
+
+	it("always includes max_output_tokens for Responses API", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: false, // should still include based on model info
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 123, // fallback used when modelMaxTokens not set
+				supportsPromptCache: false,
+			},
+		})
+
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).toHaveProperty("max_output_tokens", 123)
+	})
+
+	it("does not include text.verbosity when not provided", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).not.toHaveProperty("text")
+	})
+})
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -182,21 +182,35 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			// Build Responses payload (align with OpenAI Native Responses API formatting)
 			// Azure- and Responses-compatible multimodal handling:
-			// - Use array input ONLY when the latest user message contains images
-			// - Include the most recent assistant message as input_text to preserve continuity
-			// - Always include a Developer preface
+			// - Use array input ONLY when the latest user message contains images (initial turn)
+			// - When previous_response_id is present, send only the latest user turn:
+			//   • Text-only => single string "User: ...", no Developer preface
+			//   • With images => one-item array containing only the latest user content (no Developer preface)
 			const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
 			const lastUserHasImages =
 				!!lastUserMessage &&
 				Array.isArray(lastUserMessage.content) &&
 				lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
 
+			// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
+			const previousId = metadata?.suppressPreviousResponseId
+				? undefined
+				: (metadata?.previousResponseId ?? this.lastResponseId)
+
+			const minimalInputMode = Boolean(previousId)
+
 			let inputPayload: unknown
-			if (lastUserHasImages && lastUserMessage) {
-				// Select messages to retain context in array mode:
-				// - The most recent assistant message (text-only, as input_text)
-				// - All user messages that contain images
-				// - The latest user message (even if it has no image)
+			if (minimalInputMode && lastUserMessage) {
+				// Minimal-mode: only the latest user message (no Developer preface)
+				if (lastUserHasImages) {
+					// Single-item array with just the latest user content
+					inputPayload = this._toResponsesInput([lastUserMessage])
+				} else {
+					// Single message string "User: ..."
+					inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
+				}
+			} else if (lastUserHasImages && lastUserMessage) {
+				// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
 				const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
 
 				const messagesForArray = messages.filter((m) => {
@@ -219,15 +233,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				}
 				inputPayload = [developerPreface, ...arrayInput]
 			} else {
-				// Pure text history: use compact transcript (includes both user and assistant turns)
+				// Pure text history: full compact transcript (includes both user and assistant turns)
 				inputPayload = this._formatResponsesInput(systemPrompt, messages)
 			}
 			const usedArrayInput = Array.isArray(inputPayload)
 
-			const previousId = metadata?.suppressPreviousResponseId
-				? undefined
-				: (metadata?.previousResponseId ?? this.lastResponseId)
-
 			const basePayload: Record<string, unknown> = {
 				model: modelId,
 				input: inputPayload,
@@ -262,20 +272,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
 			}
 
-			// Verbosity: include via text.verbosity (Responses API expectation per openai-native handler)
-			const effectiveVerbosity = this.options.verbosity || verbosity
-			if (effectiveVerbosity) {
+			// Verbosity: include only when explicitly specified in settings
+			if (this.options.verbosity) {
 				;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
-					verbosity: effectiveVerbosity as "low" | "medium" | "high",
+					verbosity: this.options.verbosity as "low" | "medium" | "high",
 				}
 			}
 
-			// Add max_output_tokens if requested (Azure Responses naming)
-			if (this.options.includeMaxTokens === true) {
-				basePayload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
-			}
+			// Always include max_output_tokens for Responses API to cap output length
+			const reservedMax = (modelParams as any)?.maxTokens
+			;(basePayload as Record<string, unknown>).max_output_tokens =
+				this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
 
-			// Non-streaming path (preserves existing behavior and tests)
+			// Non-streaming path
 			if (nonStreaming) {
 				try {
 					const response = await (
@@ -314,10 +323,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						).responses.create(withoutVerbosity)
 						yield* this._yieldResponsesResult(response as unknown, modelInfo)
 					} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
-						// Azure-specific fallback: retry with string transcript when array input is rejected
+						// Azure-specific fallback: retry with a minimal single-message string when array input is rejected
 						const retryPayload: Record<string, unknown> = {
 							...basePayload,
-							input: this._formatResponsesInput(systemPrompt, messages),
+							input:
+								previousId && lastUserMessage
+									? this._formatResponsesSingleMessage(lastUserMessage, true)
+									: this._formatResponsesInput(systemPrompt, messages),
 						}
 						const response = await (
 							this.client as unknown as {
@@ -412,10 +424,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
 					}
 				} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
-					// Azure-specific fallback for streaming: retry with string transcript while keeping stream: true
+					// Azure-specific fallback for streaming: retry with minimal single-message string while keeping stream: true
 					const retryStreamingPayload: Record<string, unknown> = {
 						...streamingPayload,
-						input: this._formatResponsesInput(systemPrompt, messages),
+						input:
+							previousId && lastUserMessage
+								? this._formatResponsesSingleMessage(lastUserMessage, true)
+								: this._formatResponsesInput(systemPrompt, messages),
 					}
 					const maybeStreamRetry = await (
 						this.client as unknown as {
@@ -661,9 +676,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					payload.temperature = this.options.modelTemperature
 				}
 
-				// Verbosity via text.verbosity
+				// Verbosity via text.verbosity - include only when explicitly specified
 				if (this.options.verbosity) {
-					payload.text = { verbosity: this.options.verbosity }
+					payload.text = { verbosity: this.options.verbosity as "low" | "medium" | "high" }
 				}
 
 				// max_output_tokens
diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
@@ -22,6 +22,7 @@ import { inputEventTransform, noTransform } from "../transforms"
 import { ModelPicker } from "../ModelPicker"
 import { R1FormatSetting } from "../R1FormatSetting"
 import { ThinkingBudget } from "../ThinkingBudget"
+import { Verbosity } from "../Verbosity"
 
 type OpenAICompatibleProps = {
 	apiConfiguration: ProviderSettings
@@ -40,6 +41,7 @@ export const OpenAICompatible = ({
 
 	const [azureApiVersionSelected, setAzureApiVersionSelected] = useState(!!apiConfiguration?.azureApiVersion)
 	const [openAiLegacyFormatSelected, setOpenAiLegacyFormatSelected] = useState(!!apiConfiguration?.openAiLegacyFormat)
+	const [verbositySelected, setVerbositySelected] = useState(!!apiConfiguration?.verbosity)
 
 	const [openAiModels, setOpenAiModels] = useState<Record<string, ModelInfo> | null>(null)
 
@@ -282,6 +284,27 @@ export const OpenAICompatible = ({
 					/>
 				)}
 			</div>
+			<div className="flex flex-col gap-1">
+				<Checkbox
+					checked={verbositySelected}
+					onChange={(checked: boolean) => {
+						setVerbositySelected(checked)
+						if (!checked) {
+							setApiConfigurationField("verbosity", undefined as any)
+						} else if (!apiConfiguration.verbosity) {
+							setApiConfigurationField("verbosity", "medium" as any)
+						}
+					}}>
+					{t("settings:providers.verbosity.label")}
+				</Checkbox>
+				{verbositySelected && (
+					<Verbosity
+						apiConfiguration={apiConfiguration}
+						setApiConfigurationField={setApiConfigurationField as any}
+						modelInfo={apiConfiguration.openAiCustomModelInfo || openAiModelInfoSaneDefaults}
+					/>
+				)}
+			</div>
 			<div className="flex flex-col gap-3">
 				<div className="text-sm text-vscode-descriptionForeground whitespace-pre-line">
 					{t("settings:providers.customModel.capabilities")}