Skip to content

Commit cd51254

Browse files
committed
fix: omit the conversation in responses api.
1 parent 825c502 commit cd51254

File tree

3 files changed

+193
-28
lines changed

3 files changed

+193
-28
lines changed

src/api/providers/__tests__/openai.spec.ts

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,3 +1860,130 @@ describe("OpenAI Compatible - Responses API parity improvements", () => {
18601860
expect(args.reasoning.summary).toBeUndefined()
18611861
})
18621862
})
1863+
1864+
describe("OpenAI Compatible - Responses API minimal input parity (new tests)", () => {
1865+
beforeEach(() => {
1866+
// @ts-ignore - reuse mocks from this spec module
1867+
mockCreate.mockClear()
1868+
// @ts-ignore - reuse mocks from this spec module
1869+
mockResponsesCreate.mockClear()
1870+
})
1871+
1872+
it("sends only latest user message when previous_response_id is provided (string input, no Developer preface)", async () => {
1873+
const handler = new OpenAiHandler({
1874+
openAiApiKey: "k",
1875+
openAiModelId: "gpt-5",
1876+
openAiBaseUrl: "https://api.openai.com/v1/responses",
1877+
openAiStreamingEnabled: false,
1878+
})
1879+
1880+
const msgs: Anthropic.Messages.MessageParam[] = [
1881+
{ role: "user", content: [{ type: "text" as const, text: "First" }] },
1882+
{ role: "assistant", content: [{ type: "text" as const, text: "Reply" }] },
1883+
{ role: "user", content: [{ type: "text" as const, text: "Latest" }] },
1884+
]
1885+
1886+
const chunks: any[] = []
1887+
for await (const ch of handler.createMessage("System Inst", msgs, { previousResponseId: "prev-1" } as any)) {
1888+
chunks.push(ch)
1889+
}
1890+
1891+
// Ensure Responses API was used with minimal input
1892+
// @ts-ignore
1893+
expect(mockResponsesCreate).toHaveBeenCalled()
1894+
// @ts-ignore
1895+
const args = mockResponsesCreate.mock.calls[0][0]
1896+
1897+
expect(typeof args.input).toBe("string")
1898+
expect(args.input).toBe("User: Latest")
1899+
expect(String(args.input)).not.toContain("Developer: System Inst")
1900+
})
1901+
1902+
it("uses array input with only latest user content when previous_response_id and last user has images (no Developer preface)", async () => {
1903+
const handler = new OpenAiHandler({
1904+
openAiApiKey: "k",
1905+
openAiModelId: "gpt-5",
1906+
openAiBaseUrl: "https://api.openai.com/v1/responses",
1907+
openAiStreamingEnabled: false,
1908+
})
1909+
1910+
const msgs: Anthropic.Messages.MessageParam[] = [
1911+
{ role: "user", content: [{ type: "text" as const, text: "Prev" }] },
1912+
{ role: "assistant", content: [{ type: "text" as const, text: "Ok" }] },
1913+
{
1914+
role: "user",
1915+
content: [
1916+
{ type: "text" as const, text: "See" },
1917+
{ type: "image" as const, source: { media_type: "image/png", data: "IMGDATA" } as any },
1918+
],
1919+
},
1920+
]
1921+
1922+
const iter = handler.createMessage("Sys", msgs, { previousResponseId: "prev-2" } as any)
1923+
for await (const _ of iter) {
1924+
// consume
1925+
}
1926+
1927+
// @ts-ignore
1928+
const args = mockResponsesCreate.mock.calls.pop()?.[0]
1929+
expect(Array.isArray(args.input)).toBe(true)
1930+
1931+
const arr = args.input as any[]
1932+
expect(arr.length).toBe(1)
1933+
expect(arr[0]?.role).toBe("user")
1934+
1935+
const contents = arr[0]?.content || []
1936+
const hasImg = contents.some((p: any) => p?.type === "input_image")
1937+
expect(hasImg).toBe(true)
1938+
1939+
// No Developer preface should be injected in minimal mode
1940+
const hasDev = contents.some(
1941+
(p: any) => p?.type === "input_text" && typeof p.text === "string" && p.text.includes("Developer:"),
1942+
)
1943+
expect(hasDev).toBe(false)
1944+
})
1945+
1946+
it("always includes max_output_tokens for Responses API", async () => {
1947+
const handler = new OpenAiHandler({
1948+
openAiApiKey: "k",
1949+
openAiModelId: "gpt-5",
1950+
openAiBaseUrl: "https://api.openai.com/v1/responses",
1951+
openAiStreamingEnabled: false,
1952+
includeMaxTokens: false, // should still include based on model info
1953+
openAiCustomModelInfo: {
1954+
contextWindow: 128_000,
1955+
maxTokens: 123, // fallback used when modelMaxTokens not set
1956+
supportsPromptCache: false,
1957+
},
1958+
})
1959+
1960+
for await (const _ of handler.createMessage("sys", [
1961+
{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
1962+
])) {
1963+
// consume
1964+
}
1965+
1966+
// @ts-ignore
1967+
const args = mockResponsesCreate.mock.calls.pop()?.[0]
1968+
expect(args).toHaveProperty("max_output_tokens", 123)
1969+
})
1970+
1971+
it("does not include text.verbosity when not provided", async () => {
1972+
const handler = new OpenAiHandler({
1973+
openAiApiKey: "k",
1974+
openAiModelId: "gpt-5",
1975+
openAiBaseUrl: "https://api.openai.com/v1/responses",
1976+
openAiStreamingEnabled: false,
1977+
})
1978+
1979+
for await (const _ of handler.createMessage("sys", [
1980+
{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
1981+
])) {
1982+
// consume
1983+
}
1984+
1985+
// @ts-ignore
1986+
const args = mockResponsesCreate.mock.calls.pop()?.[0]
1987+
expect(args).not.toHaveProperty("text")
1988+
})
1989+
})

src/api/providers/openai.ts

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -182,21 +182,35 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
182182

183183
// Build Responses payload (align with OpenAI Native Responses API formatting)
184184
// Azure- and Responses-compatible multimodal handling:
185-
// - Use array input ONLY when the latest user message contains images
186-
// - Include the most recent assistant message as input_text to preserve continuity
187-
// - Always include a Developer preface
185+
// - Use array input ONLY when the latest user message contains images (initial turn)
186+
// - When previous_response_id is present, send only the latest user turn:
187+
// • Text-only => single string "User: ...", no Developer preface
188+
// • With images => one-item array containing only the latest user content (no Developer preface)
188189
const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
189190
const lastUserHasImages =
190191
!!lastUserMessage &&
191192
Array.isArray(lastUserMessage.content) &&
192193
lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
193194

195+
// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
196+
const previousId = metadata?.suppressPreviousResponseId
197+
? undefined
198+
: (metadata?.previousResponseId ?? this.lastResponseId)
199+
200+
const minimalInputMode = Boolean(previousId)
201+
194202
let inputPayload: unknown
195-
if (lastUserHasImages && lastUserMessage) {
196-
// Select messages to retain context in array mode:
197-
// - The most recent assistant message (text-only, as input_text)
198-
// - All user messages that contain images
199-
// - The latest user message (even if it has no image)
203+
if (minimalInputMode && lastUserMessage) {
204+
// Minimal-mode: only the latest user message (no Developer preface)
205+
if (lastUserHasImages) {
206+
// Single-item array with just the latest user content
207+
inputPayload = this._toResponsesInput([lastUserMessage])
208+
} else {
209+
// Single message string "User: ..."
210+
inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
211+
}
212+
} else if (lastUserHasImages && lastUserMessage) {
213+
// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
200214
const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
201215

202216
const messagesForArray = messages.filter((m) => {
@@ -219,15 +233,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
219233
}
220234
inputPayload = [developerPreface, ...arrayInput]
221235
} else {
222-
// Pure text history: use compact transcript (includes both user and assistant turns)
236+
// Pure text history: full compact transcript (includes both user and assistant turns)
223237
inputPayload = this._formatResponsesInput(systemPrompt, messages)
224238
}
225239
const usedArrayInput = Array.isArray(inputPayload)
226240

227-
const previousId = metadata?.suppressPreviousResponseId
228-
? undefined
229-
: (metadata?.previousResponseId ?? this.lastResponseId)
230-
231241
const basePayload: Record<string, unknown> = {
232242
model: modelId,
233243
input: inputPayload,
@@ -262,20 +272,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
262272
basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
263273
}
264274

265-
// Verbosity: include via text.verbosity (Responses API expectation per openai-native handler)
266-
const effectiveVerbosity = this.options.verbosity || verbosity
267-
if (effectiveVerbosity) {
275+
// Verbosity: include only when explicitly specified in settings
276+
if (this.options.verbosity) {
268277
;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
269-
verbosity: effectiveVerbosity as "low" | "medium" | "high",
278+
verbosity: this.options.verbosity as "low" | "medium" | "high",
270279
}
271280
}
272281

273-
// Add max_output_tokens if requested (Azure Responses naming)
274-
if (this.options.includeMaxTokens === true) {
275-
basePayload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
276-
}
282+
// Always include max_output_tokens for Responses API to cap output length
283+
const reservedMax = (modelParams as any)?.maxTokens
284+
;(basePayload as Record<string, unknown>).max_output_tokens =
285+
this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
277286

278-
// Non-streaming path (preserves existing behavior and tests)
287+
// Non-streaming path
279288
if (nonStreaming) {
280289
try {
281290
const response = await (
@@ -314,10 +323,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
314323
).responses.create(withoutVerbosity)
315324
yield* this._yieldResponsesResult(response as unknown, modelInfo)
316325
} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
317-
// Azure-specific fallback: retry with string transcript when array input is rejected
326+
// Azure-specific fallback: retry with a minimal single-message string when array input is rejected
318327
const retryPayload: Record<string, unknown> = {
319328
...basePayload,
320-
input: this._formatResponsesInput(systemPrompt, messages),
329+
input:
330+
previousId && lastUserMessage
331+
? this._formatResponsesSingleMessage(lastUserMessage, true)
332+
: this._formatResponsesInput(systemPrompt, messages),
321333
}
322334
const response = await (
323335
this.client as unknown as {
@@ -412,10 +424,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
412424
yield* this._yieldResponsesResult(maybeStreamRetry as unknown, modelInfo)
413425
}
414426
} else if (usedArrayInput && this._isInputTextInvalidError(err)) {
415-
// Azure-specific fallback for streaming: retry with string transcript while keeping stream: true
427+
// Azure-specific fallback for streaming: retry with minimal single-message string while keeping stream: true
416428
const retryStreamingPayload: Record<string, unknown> = {
417429
...streamingPayload,
418-
input: this._formatResponsesInput(systemPrompt, messages),
430+
input:
431+
previousId && lastUserMessage
432+
? this._formatResponsesSingleMessage(lastUserMessage, true)
433+
: this._formatResponsesInput(systemPrompt, messages),
419434
}
420435
const maybeStreamRetry = await (
421436
this.client as unknown as {
@@ -661,9 +676,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
661676
payload.temperature = this.options.modelTemperature
662677
}
663678

664-
// Verbosity via text.verbosity
679+
// Verbosity via text.verbosity - include only when explicitly specified
665680
if (this.options.verbosity) {
666-
payload.text = { verbosity: this.options.verbosity }
681+
payload.text = { verbosity: this.options.verbosity as "low" | "medium" | "high" }
667682
}
668683

669684
// max_output_tokens

webview-ui/src/components/settings/providers/OpenAICompatible.tsx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import { inputEventTransform, noTransform } from "../transforms"
2222
import { ModelPicker } from "../ModelPicker"
2323
import { R1FormatSetting } from "../R1FormatSetting"
2424
import { ThinkingBudget } from "../ThinkingBudget"
25+
import { Verbosity } from "../Verbosity"
2526

2627
type OpenAICompatibleProps = {
2728
apiConfiguration: ProviderSettings
@@ -40,6 +41,7 @@ export const OpenAICompatible = ({
4041

4142
const [azureApiVersionSelected, setAzureApiVersionSelected] = useState(!!apiConfiguration?.azureApiVersion)
4243
const [openAiLegacyFormatSelected, setOpenAiLegacyFormatSelected] = useState(!!apiConfiguration?.openAiLegacyFormat)
44+
const [verbositySelected, setVerbositySelected] = useState(!!apiConfiguration?.verbosity)
4345

4446
const [openAiModels, setOpenAiModels] = useState<Record<string, ModelInfo> | null>(null)
4547

@@ -282,6 +284,27 @@ export const OpenAICompatible = ({
282284
/>
283285
)}
284286
</div>
287+
<div className="flex flex-col gap-1">
288+
<Checkbox
289+
checked={verbositySelected}
290+
onChange={(checked: boolean) => {
291+
setVerbositySelected(checked)
292+
if (!checked) {
293+
setApiConfigurationField("verbosity", undefined as any)
294+
} else if (!apiConfiguration.verbosity) {
295+
setApiConfigurationField("verbosity", "medium" as any)
296+
}
297+
}}>
298+
{t("settings:providers.verbosity.label")}
299+
</Checkbox>
300+
{verbositySelected && (
301+
<Verbosity
302+
apiConfiguration={apiConfiguration}
303+
setApiConfigurationField={setApiConfigurationField as any}
304+
modelInfo={apiConfiguration.openAiCustomModelInfo || openAiModelInfoSaneDefaults}
305+
/>
306+
)}
307+
</div>
285308
<div className="flex flex-col gap-3">
286309
<div className="text-sm text-vscode-descriptionForeground whitespace-pre-line">
287310
{t("settings:providers.customModel.capabilities")}

0 commit comments

Comments
 (0)