@@ -174,150 +174,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
174174
175175 // If Responses API is selected, use the Responses payload and endpoint
176176 if ( flavor === "responses" ) {
177- const nonStreaming = ! ( this . options . openAiStreamingEnabled ?? true )
178-
179- // Build Responses payload (align with OpenAI Native Responses API formatting)
180- // Azure- and Responses-compatible multimodal handling:
181- // - Use array input ONLY when the latest user message contains images (initial turn)
182- // - When previous_response_id is present, send only the latest user turn:
183- // • Text-only => single string "User: ...", no Developer preface
184- // • With images => one-item array containing only the latest user content (no Developer preface)
185- const lastUserMessage = [ ...messages ] . reverse ( ) . find ( ( m ) => m . role === "user" )
186- const lastUserHasImages =
187- ! ! lastUserMessage &&
188- Array . isArray ( lastUserMessage . content ) &&
189- lastUserMessage . content . some ( ( b : unknown ) => ( b as { type ?: string } | undefined ) ?. type === "image" )
190-
191- // Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
192- const previousId = metadata ?. suppressPreviousResponseId
193- ? undefined
194- : ( metadata ?. previousResponseId ?? this . lastResponseId )
195-
196- const minimalInputMode = Boolean ( previousId )
197-
198- let inputPayload : unknown
199- if ( minimalInputMode && lastUserMessage ) {
200- // Minimal-mode: only the latest user message (no Developer preface)
201- if ( lastUserHasImages ) {
202- // Single-item array with just the latest user content
203- inputPayload = this . _toResponsesInput ( [ lastUserMessage ] )
204- } else {
205- // Single message string "User: ..."
206- inputPayload = this . _formatResponsesSingleMessage ( lastUserMessage , true )
207- }
208- } else if ( lastUserHasImages && lastUserMessage ) {
209- // Initial turn with images: include Developer preface and minimal prior context to preserve continuity
210- const lastAssistantMessage = [ ...messages ] . reverse ( ) . find ( ( m ) => m . role === "assistant" )
211-
212- const messagesForArray = messages . filter ( ( m ) => {
213- if ( m . role === "assistant" ) {
214- return lastAssistantMessage ? m === lastAssistantMessage : false
215- }
216- if ( m . role === "user" ) {
217- const hasImage =
218- Array . isArray ( m . content ) &&
219- m . content . some ( ( b : unknown ) => ( b as { type ?: string } | undefined ) ?. type === "image" )
220- return hasImage || m === lastUserMessage
221- }
222- return false
223- } )
224-
225- const arrayInput = this . _toResponsesInput ( messagesForArray )
226- const developerPreface = {
227- role : "user" as const ,
228- content : [ { type : "input_text" as const , text : `Developer: ${ systemPrompt } ` } ] ,
229- }
230- inputPayload = [ developerPreface , ...arrayInput ]
231- } else {
232- // Pure text history: full compact transcript (includes both user and assistant turns)
233- inputPayload = this . _formatResponsesInput ( systemPrompt , messages )
234- }
235- const usedArrayInput = Array . isArray ( inputPayload )
236-
237- const basePayload : Record < string , unknown > = {
238- model : modelId ,
239- input : inputPayload ,
240- ...( previousId ? { previous_response_id : previousId } : { } ) ,
241- }
242-
243- // Reasoning effort (Responses expects: reasoning: { effort, summary? })
244- // Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
245- if ( this . options . enableReasoningEffort && ( this . options . reasoningEffort || reasoningEffort ) ) {
246- const effort = ( this . options . reasoningEffort || reasoningEffort ) as
247- | "minimal"
248- | "low"
249- | "medium"
250- | "high"
251- | undefined
252- if ( effort ) {
253- ; (
254- basePayload as {
255- reasoning ?: { effort : "minimal" | "low" | "medium" | "high" ; summary ?: "auto" }
256- }
257- ) . reasoning = {
258- effort,
259- ...( this . options . enableGpt5ReasoningSummary !== false ? { summary : "auto" as const } : { } ) ,
260- }
261- }
262- }
263-
264- // Temperature (only include when explicitly set by the user)
265- if ( this . options . modelTemperature !== undefined ) {
266- basePayload . temperature = this . options . modelTemperature
267- } else if ( deepseekReasoner ) {
268- basePayload . temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
269- }
270-
271- // Verbosity: include only when explicitly specified in settings
272- if ( this . options . verbosity ) {
273- ; ( basePayload as { text ?: { verbosity : "low" | "medium" | "high" } } ) . text = {
274- verbosity : this . options . verbosity as "low" | "medium" | "high" ,
275- }
276- }
277-
278- // Always include max_output_tokens for Responses API to cap output length
279- const reservedMax = openAiParams . maxTokens
280- ; ( basePayload as Record < string , unknown > ) . max_output_tokens =
281- this . options . modelMaxTokens || reservedMax || modelInfo . maxTokens
282-
283- // Non-streaming path
284- if ( nonStreaming ) {
285- const response = await this . _responsesCreateWithRetries ( basePayload , {
286- usedArrayInput,
287- lastUserMessage,
288- previousId,
289- systemPrompt,
290- messages,
291- } )
292- yield * this . _yieldResponsesResult ( response , modelInfo )
293- return
294- }
295-
296- // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
297- const streamingPayload : Record < string , unknown > = { ...basePayload , stream : true }
298- const maybeStream = await this . _responsesCreateWithRetries ( streamingPayload , {
299- usedArrayInput,
300- lastUserMessage,
301- previousId,
302- systemPrompt,
303- messages,
304- } )
305-
306- const isAsyncIterable = ( obj : unknown ) : obj is AsyncIterable < unknown > =>
307- typeof ( obj as AsyncIterable < unknown > ) [ Symbol . asyncIterator ] === "function"
308-
309- if ( isAsyncIterable ( maybeStream ) ) {
310- for await ( const chunk of handleResponsesStream ( maybeStream , {
311- onResponseId : ( id ) => {
312- this . lastResponseId = id
313- } ,
314- } ) ) {
315- yield chunk
316- }
317- } else {
318- // Some providers may ignore the stream flag and return a complete response
319- yield * this . _yieldResponsesResult ( maybeStream , modelInfo )
320- }
177+ yield * this . _handleResponsesFlavor ( systemPrompt , messages , metadata , modelInfo , openAiParams )
321178 return
322179 }
323180
@@ -869,6 +726,161 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
869726
870727 // --- Responses helpers ---
871728
729+ private async * _handleResponsesFlavor (
730+ systemPrompt : string ,
731+ messages : Anthropic . Messages . MessageParam [ ] ,
732+ metadata : ApiHandlerCreateMessageMetadata | undefined ,
733+ modelInfo : ModelInfo ,
734+ openAiParams : any ,
735+ ) : ApiStream {
736+ const modelId = this . options . openAiModelId ?? ""
737+ const nonStreaming = ! ( this . options . openAiStreamingEnabled ?? true )
738+
739+ // Build Responses payload (align with OpenAI Native Responses API formatting)
740+ // Azure- and Responses-compatible multimodal handling:
741+ // - Use array input ONLY when the latest user message contains images (initial turn)
742+ // - When previous_response_id is present, send only the latest user turn:
743+ // • Text-only => single string "User: ...", no Developer preface
744+ // • With images => one-item array containing only the latest user content (no Developer preface)
745+ const lastUserMessage = [ ...messages ] . reverse ( ) . find ( ( m ) => m . role === "user" )
746+ const lastUserHasImages =
747+ ! ! lastUserMessage &&
748+ Array . isArray ( lastUserMessage . content ) &&
749+ lastUserMessage . content . some ( ( b : unknown ) => ( b as { type ?: string } | undefined ) ?. type === "image" )
750+
751+ // Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
752+ const previousId = metadata ?. suppressPreviousResponseId
753+ ? undefined
754+ : ( metadata ?. previousResponseId ?? this . lastResponseId )
755+
756+ const minimalInputMode = Boolean ( previousId )
757+
758+ let inputPayload : unknown
759+ if ( minimalInputMode && lastUserMessage ) {
760+ // Minimal-mode: only the latest user message (no Developer preface)
761+ if ( lastUserHasImages ) {
762+ // Single-item array with just the latest user content
763+ inputPayload = this . _toResponsesInput ( [ lastUserMessage ] )
764+ } else {
765+ // Single message string "User: ..."
766+ inputPayload = this . _formatResponsesSingleMessage ( lastUserMessage , true )
767+ }
768+ } else if ( lastUserHasImages && lastUserMessage ) {
769+ // Initial turn with images: include Developer preface and minimal prior context to preserve continuity
770+ const lastAssistantMessage = [ ...messages ] . reverse ( ) . find ( ( m ) => m . role === "assistant" )
771+
772+ const messagesForArray = messages . filter ( ( m ) => {
773+ if ( m . role === "assistant" ) {
774+ return lastAssistantMessage ? m === lastAssistantMessage : false
775+ }
776+ if ( m . role === "user" ) {
777+ const hasImage =
778+ Array . isArray ( m . content ) &&
779+ m . content . some ( ( b : unknown ) => ( b as { type ?: string } | undefined ) ?. type === "image" )
780+ return hasImage || m === lastUserMessage
781+ }
782+ return false
783+ } )
784+
785+ const arrayInput = this . _toResponsesInput ( messagesForArray )
786+ const developerPreface = {
787+ role : "user" as const ,
788+ content : [ { type : "input_text" as const , text : `Developer: ${ systemPrompt } ` } ] ,
789+ }
790+ inputPayload = [ developerPreface , ...arrayInput ]
791+ } else {
792+ // Pure text history: full compact transcript (includes both user and assistant turns)
793+ inputPayload = this . _formatResponsesInput ( systemPrompt , messages )
794+ }
795+ const usedArrayInput = Array . isArray ( inputPayload )
796+
797+ const basePayload : Record < string , unknown > = {
798+ model : modelId ,
799+ input : inputPayload ,
800+ ...( previousId ? { previous_response_id : previousId } : { } ) ,
801+ }
802+
803+ // Reasoning effort (Responses expects: reasoning: { effort, summary? })
804+ // Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
805+ if ( this . options . enableReasoningEffort && ( this . options . reasoningEffort || openAiParams ?. reasoningEffort ) ) {
806+ const effort = ( this . options . reasoningEffort || openAiParams ?. reasoningEffort ) as
807+ | "minimal"
808+ | "low"
809+ | "medium"
810+ | "high"
811+ | undefined
812+ if ( effort ) {
813+ ; (
814+ basePayload as {
815+ reasoning ?: { effort : "minimal" | "low" | "medium" | "high" ; summary ?: "auto" }
816+ }
817+ ) . reasoning = {
818+ effort,
819+ ...( this . options . enableGpt5ReasoningSummary !== false ? { summary : "auto" as const } : { } ) ,
820+ }
821+ }
822+ }
823+
824+ // Temperature (only include when explicitly set by the user)
825+ const deepseekReasoner = modelId . includes ( "deepseek-reasoner" ) || ( this . options . openAiR1FormatEnabled ?? false )
826+ if ( this . options . modelTemperature !== undefined ) {
827+ basePayload . temperature = this . options . modelTemperature
828+ } else if ( deepseekReasoner ) {
829+ basePayload . temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
830+ }
831+
832+ // Verbosity: include only when explicitly specified in settings
833+ if ( this . options . verbosity ) {
834+ ; ( basePayload as { text ?: { verbosity : "low" | "medium" | "high" } } ) . text = {
835+ verbosity : this . options . verbosity as "low" | "medium" | "high" ,
836+ }
837+ }
838+
839+ // Always include max_output_tokens for Responses API to cap output length
840+ const reservedMax = openAiParams ?. maxTokens
841+ ; ( basePayload as Record < string , unknown > ) . max_output_tokens =
842+ this . options . modelMaxTokens || reservedMax || modelInfo . maxTokens
843+
844+ // Non-streaming path
845+ if ( nonStreaming ) {
846+ const response = await this . _responsesCreateWithRetries ( basePayload , {
847+ usedArrayInput,
848+ lastUserMessage,
849+ previousId,
850+ systemPrompt,
851+ messages,
852+ } )
853+ yield * this . _yieldResponsesResult ( response , modelInfo )
854+ return
855+ }
856+
857+ // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
858+ const streamingPayload : Record < string , unknown > = { ...basePayload , stream : true }
859+ const maybeStream = await this . _responsesCreateWithRetries ( streamingPayload , {
860+ usedArrayInput,
861+ lastUserMessage,
862+ previousId,
863+ systemPrompt,
864+ messages,
865+ } )
866+
867+ const isAsyncIterable = ( obj : unknown ) : obj is AsyncIterable < unknown > =>
868+ typeof ( obj as AsyncIterable < unknown > ) [ Symbol . asyncIterator ] === "function"
869+
870+ if ( isAsyncIterable ( maybeStream ) ) {
871+ for await ( const chunk of handleResponsesStream ( maybeStream , {
872+ onResponseId : ( id ) => {
873+ this . lastResponseId = id
874+ } ,
875+ } ) ) {
876+ yield chunk
877+ }
878+ } else {
879+ // Some providers may ignore the stream flag and return a complete response
880+ yield * this . _yieldResponsesResult ( maybeStream , modelInfo )
881+ }
882+ }
883+
872884 /**
873885 * Determines which OpenAI-compatible API flavor to use based on the URL path.
874886 * - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.).
0 commit comments