@@ -182,21 +182,35 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
182182
183183 // Build Responses payload (align with OpenAI Native Responses API formatting)
184184 // Azure- and Responses-compatible multimodal handling:
185- // - Use array input ONLY when the latest user message contains images
186- // - Include the most recent assistant message as input_text to preserve continuity
187- // - Always include a Developer preface
185+ // - Use array input ONLY when the latest user message contains images (initial turn)
186+ // - When previous_response_id is present, send only the latest user turn:
187+ // • Text-only => single string "User: ...", no Developer preface
188+ // • With images => one-item array containing only the latest user content (no Developer preface)
188189 const lastUserMessage = [ ...messages ] . reverse ( ) . find ( ( m ) => m . role === "user" )
189190 const lastUserHasImages =
190191 ! ! lastUserMessage &&
191192 Array . isArray ( lastUserMessage . content ) &&
192193 lastUserMessage . content . some ( ( b : unknown ) => ( b as { type ?: string } | undefined ) ?. type === "image" )
193194
195+ // Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
196+ const previousId = metadata ?. suppressPreviousResponseId
197+ ? undefined
198+ : ( metadata ?. previousResponseId ?? this . lastResponseId )
199+
200+ const minimalInputMode = Boolean ( previousId )
201+
194202 let inputPayload : unknown
195- if ( lastUserHasImages && lastUserMessage ) {
196- // Select messages to retain context in array mode:
197- // - The most recent assistant message (text-only, as input_text)
198- // - All user messages that contain images
199- // - The latest user message (even if it has no image)
203+ if ( minimalInputMode && lastUserMessage ) {
204+ // Minimal-mode: only the latest user message (no Developer preface)
205+ if ( lastUserHasImages ) {
206+ // Single-item array with just the latest user content
207+ inputPayload = this . _toResponsesInput ( [ lastUserMessage ] )
208+ } else {
209+ // Single message string "User: ..."
210+ inputPayload = this . _formatResponsesSingleMessage ( lastUserMessage , true )
211+ }
212+ } else if ( lastUserHasImages && lastUserMessage ) {
213+ // Initial turn with images: include Developer preface and minimal prior context to preserve continuity
200214 const lastAssistantMessage = [ ...messages ] . reverse ( ) . find ( ( m ) => m . role === "assistant" )
201215
202216 const messagesForArray = messages . filter ( ( m ) => {
@@ -219,15 +233,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
219233 }
220234 inputPayload = [ developerPreface , ...arrayInput ]
221235 } else {
222- // Pure text history: use compact transcript (includes both user and assistant turns)
236+ // Pure text history: full compact transcript (includes both user and assistant turns)
223237 inputPayload = this . _formatResponsesInput ( systemPrompt , messages )
224238 }
225239 const usedArrayInput = Array . isArray ( inputPayload )
226240
227- const previousId = metadata ?. suppressPreviousResponseId
228- ? undefined
229- : ( metadata ?. previousResponseId ?? this . lastResponseId )
230-
231241 const basePayload : Record < string , unknown > = {
232242 model : modelId ,
233243 input : inputPayload ,
@@ -262,20 +272,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
262272 basePayload . temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
263273 }
264274
265- // Verbosity: include via text.verbosity (Responses API expectation per openai-native handler)
266- const effectiveVerbosity = this . options . verbosity || verbosity
267- if ( effectiveVerbosity ) {
275+ // Verbosity: include only when explicitly specified in settings
276+ if ( this . options . verbosity ) {
268277 ; ( basePayload as { text ?: { verbosity : "low" | "medium" | "high" } } ) . text = {
269- verbosity : effectiveVerbosity as "low" | "medium" | "high" ,
278+ verbosity : this . options . verbosity as "low" | "medium" | "high" ,
270279 }
271280 }
272281
273- // Add max_output_tokens if requested (Azure Responses naming)
274- if ( this . options . includeMaxTokens === true ) {
275- basePayload . max_output_tokens = this . options . modelMaxTokens || modelInfo . maxTokens
276- }
282+ // Always include max_output_tokens for Responses API to cap output length
283+ const reservedMax = ( modelParams as any ) ?. maxTokens
284+ ; ( basePayload as Record < string , unknown > ) . max_output_tokens =
285+ this . options . modelMaxTokens || reservedMax || modelInfo . maxTokens
277286
278- // Non-streaming path (preserves existing behavior and tests)
287+ // Non-streaming path
279288 if ( nonStreaming ) {
280289 try {
281290 const response = await (
@@ -314,10 +323,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
314323 ) . responses . create ( withoutVerbosity )
315324 yield * this . _yieldResponsesResult ( response as unknown , modelInfo )
316325 } else if ( usedArrayInput && this . _isInputTextInvalidError ( err ) ) {
317- // Azure-specific fallback: retry with string transcript when array input is rejected
326+ // Azure-specific fallback: retry with a minimal single-message string when array input is rejected
318327 const retryPayload : Record < string , unknown > = {
319328 ...basePayload ,
320- input : this . _formatResponsesInput ( systemPrompt , messages ) ,
329+ input :
330+ previousId && lastUserMessage
331+ ? this . _formatResponsesSingleMessage ( lastUserMessage , true )
332+ : this . _formatResponsesInput ( systemPrompt , messages ) ,
321333 }
322334 const response = await (
323335 this . client as unknown as {
@@ -412,10 +424,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
412424 yield * this . _yieldResponsesResult ( maybeStreamRetry as unknown , modelInfo )
413425 }
414426 } else if ( usedArrayInput && this . _isInputTextInvalidError ( err ) ) {
415- // Azure-specific fallback for streaming: retry with string transcript while keeping stream: true
427+ // Azure-specific fallback for streaming: retry with minimal single-message string while keeping stream: true
416428 const retryStreamingPayload : Record < string , unknown > = {
417429 ...streamingPayload ,
418- input : this . _formatResponsesInput ( systemPrompt , messages ) ,
430+ input :
431+ previousId && lastUserMessage
432+ ? this . _formatResponsesSingleMessage ( lastUserMessage , true )
433+ : this . _formatResponsesInput ( systemPrompt , messages ) ,
419434 }
420435 const maybeStreamRetry = await (
421436 this . client as unknown as {
@@ -661,9 +676,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
661676 payload . temperature = this . options . modelTemperature
662677 }
663678
664- // Verbosity via text.verbosity
679+ // Verbosity via text.verbosity - include only when explicitly specified
665680 if ( this . options . verbosity ) {
666- payload . text = { verbosity : this . options . verbosity }
681+ payload . text = { verbosity : this . options . verbosity as "low" | "medium" | "high" }
667682 }
668683
669684 // max_output_tokens
0 commit comments