Skip to content

Commit 1144bf9

Browse files
committed
refactor(openai): extract Responses API handling into helper and delegate from createMessage
- Move Responses API logic to private _handleResponsesFlavor - Preserve streaming, retries, conversation continuity, reasoning/verbosity, and usage - All existing tests pass
1 parent 6f449a2 commit 1144bf9

File tree

1 file changed

+156
-144
lines changed

1 file changed

+156
-144
lines changed

src/api/providers/openai.ts

Lines changed: 156 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -174,150 +174,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
174174

175175
// If Responses API is selected, use the Responses payload and endpoint
176176
if (flavor === "responses") {
177-
const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
178-
179-
// Build Responses payload (align with OpenAI Native Responses API formatting)
180-
// Azure- and Responses-compatible multimodal handling:
181-
// - Use array input ONLY when the latest user message contains images (initial turn)
182-
// - When previous_response_id is present, send only the latest user turn:
183-
// • Text-only => single string "User: ...", no Developer preface
184-
// • With images => one-item array containing only the latest user content (no Developer preface)
185-
const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
186-
const lastUserHasImages =
187-
!!lastUserMessage &&
188-
Array.isArray(lastUserMessage.content) &&
189-
lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
190-
191-
// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
192-
const previousId = metadata?.suppressPreviousResponseId
193-
? undefined
194-
: (metadata?.previousResponseId ?? this.lastResponseId)
195-
196-
const minimalInputMode = Boolean(previousId)
197-
198-
let inputPayload: unknown
199-
if (minimalInputMode && lastUserMessage) {
200-
// Minimal-mode: only the latest user message (no Developer preface)
201-
if (lastUserHasImages) {
202-
// Single-item array with just the latest user content
203-
inputPayload = this._toResponsesInput([lastUserMessage])
204-
} else {
205-
// Single message string "User: ..."
206-
inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
207-
}
208-
} else if (lastUserHasImages && lastUserMessage) {
209-
// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
210-
const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
211-
212-
const messagesForArray = messages.filter((m) => {
213-
if (m.role === "assistant") {
214-
return lastAssistantMessage ? m === lastAssistantMessage : false
215-
}
216-
if (m.role === "user") {
217-
const hasImage =
218-
Array.isArray(m.content) &&
219-
m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
220-
return hasImage || m === lastUserMessage
221-
}
222-
return false
223-
})
224-
225-
const arrayInput = this._toResponsesInput(messagesForArray)
226-
const developerPreface = {
227-
role: "user" as const,
228-
content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
229-
}
230-
inputPayload = [developerPreface, ...arrayInput]
231-
} else {
232-
// Pure text history: full compact transcript (includes both user and assistant turns)
233-
inputPayload = this._formatResponsesInput(systemPrompt, messages)
234-
}
235-
const usedArrayInput = Array.isArray(inputPayload)
236-
237-
const basePayload: Record<string, unknown> = {
238-
model: modelId,
239-
input: inputPayload,
240-
...(previousId ? { previous_response_id: previousId } : {}),
241-
}
242-
243-
// Reasoning effort (Responses expects: reasoning: { effort, summary? })
244-
// Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
245-
if (this.options.enableReasoningEffort && (this.options.reasoningEffort || reasoningEffort)) {
246-
const effort = (this.options.reasoningEffort || reasoningEffort) as
247-
| "minimal"
248-
| "low"
249-
| "medium"
250-
| "high"
251-
| undefined
252-
if (effort) {
253-
;(
254-
basePayload as {
255-
reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" }
256-
}
257-
).reasoning = {
258-
effort,
259-
...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
260-
}
261-
}
262-
}
263-
264-
// Temperature (only include when explicitly set by the user)
265-
if (this.options.modelTemperature !== undefined) {
266-
basePayload.temperature = this.options.modelTemperature
267-
} else if (deepseekReasoner) {
268-
basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
269-
}
270-
271-
// Verbosity: include only when explicitly specified in settings
272-
if (this.options.verbosity) {
273-
;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
274-
verbosity: this.options.verbosity as "low" | "medium" | "high",
275-
}
276-
}
277-
278-
// Always include max_output_tokens for Responses API to cap output length
279-
const reservedMax = openAiParams.maxTokens
280-
;(basePayload as Record<string, unknown>).max_output_tokens =
281-
this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
282-
283-
// Non-streaming path
284-
if (nonStreaming) {
285-
const response = await this._responsesCreateWithRetries(basePayload, {
286-
usedArrayInput,
287-
lastUserMessage,
288-
previousId,
289-
systemPrompt,
290-
messages,
291-
})
292-
yield* this._yieldResponsesResult(response, modelInfo)
293-
return
294-
}
295-
296-
// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
297-
const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
298-
const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
299-
usedArrayInput,
300-
lastUserMessage,
301-
previousId,
302-
systemPrompt,
303-
messages,
304-
})
305-
306-
const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
307-
typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
308-
309-
if (isAsyncIterable(maybeStream)) {
310-
for await (const chunk of handleResponsesStream(maybeStream, {
311-
onResponseId: (id) => {
312-
this.lastResponseId = id
313-
},
314-
})) {
315-
yield chunk
316-
}
317-
} else {
318-
// Some providers may ignore the stream flag and return a complete response
319-
yield* this._yieldResponsesResult(maybeStream, modelInfo)
320-
}
177+
yield* this._handleResponsesFlavor(systemPrompt, messages, metadata, modelInfo, openAiParams)
321178
return
322179
}
323180

@@ -869,6 +726,161 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
869726

870727
// --- Responses helpers ---
871728

729+
private async *_handleResponsesFlavor(
730+
systemPrompt: string,
731+
messages: Anthropic.Messages.MessageParam[],
732+
metadata: ApiHandlerCreateMessageMetadata | undefined,
733+
modelInfo: ModelInfo,
734+
openAiParams: any,
735+
): ApiStream {
736+
const modelId = this.options.openAiModelId ?? ""
737+
const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
738+
739+
// Build Responses payload (align with OpenAI Native Responses API formatting)
740+
// Azure- and Responses-compatible multimodal handling:
741+
// - Use array input ONLY when the latest user message contains images (initial turn)
742+
// - When previous_response_id is present, send only the latest user turn:
743+
// • Text-only => single string "User: ...", no Developer preface
744+
// • With images => one-item array containing only the latest user content (no Developer preface)
745+
const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
746+
const lastUserHasImages =
747+
!!lastUserMessage &&
748+
Array.isArray(lastUserMessage.content) &&
749+
lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
750+
751+
// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
752+
const previousId = metadata?.suppressPreviousResponseId
753+
? undefined
754+
: (metadata?.previousResponseId ?? this.lastResponseId)
755+
756+
const minimalInputMode = Boolean(previousId)
757+
758+
let inputPayload: unknown
759+
if (minimalInputMode && lastUserMessage) {
760+
// Minimal-mode: only the latest user message (no Developer preface)
761+
if (lastUserHasImages) {
762+
// Single-item array with just the latest user content
763+
inputPayload = this._toResponsesInput([lastUserMessage])
764+
} else {
765+
// Single message string "User: ..."
766+
inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
767+
}
768+
} else if (lastUserHasImages && lastUserMessage) {
769+
// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
770+
const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
771+
772+
const messagesForArray = messages.filter((m) => {
773+
if (m.role === "assistant") {
774+
return lastAssistantMessage ? m === lastAssistantMessage : false
775+
}
776+
if (m.role === "user") {
777+
const hasImage =
778+
Array.isArray(m.content) &&
779+
m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
780+
return hasImage || m === lastUserMessage
781+
}
782+
return false
783+
})
784+
785+
const arrayInput = this._toResponsesInput(messagesForArray)
786+
const developerPreface = {
787+
role: "user" as const,
788+
content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
789+
}
790+
inputPayload = [developerPreface, ...arrayInput]
791+
} else {
792+
// Pure text history: full compact transcript (includes both user and assistant turns)
793+
inputPayload = this._formatResponsesInput(systemPrompt, messages)
794+
}
795+
const usedArrayInput = Array.isArray(inputPayload)
796+
797+
const basePayload: Record<string, unknown> = {
798+
model: modelId,
799+
input: inputPayload,
800+
...(previousId ? { previous_response_id: previousId } : {}),
801+
}
802+
803+
// Reasoning effort (Responses expects: reasoning: { effort, summary? })
804+
// Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
805+
if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) {
806+
const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as
807+
| "minimal"
808+
| "low"
809+
| "medium"
810+
| "high"
811+
| undefined
812+
if (effort) {
813+
;(
814+
basePayload as {
815+
reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" }
816+
}
817+
).reasoning = {
818+
effort,
819+
...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
820+
}
821+
}
822+
}
823+
824+
// Temperature (only include when explicitly set by the user)
825+
const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false)
826+
if (this.options.modelTemperature !== undefined) {
827+
basePayload.temperature = this.options.modelTemperature
828+
} else if (deepseekReasoner) {
829+
basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
830+
}
831+
832+
// Verbosity: include only when explicitly specified in settings
833+
if (this.options.verbosity) {
834+
;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
835+
verbosity: this.options.verbosity as "low" | "medium" | "high",
836+
}
837+
}
838+
839+
// Always include max_output_tokens for Responses API to cap output length
840+
const reservedMax = openAiParams?.maxTokens
841+
;(basePayload as Record<string, unknown>).max_output_tokens =
842+
this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
843+
844+
// Non-streaming path
845+
if (nonStreaming) {
846+
const response = await this._responsesCreateWithRetries(basePayload, {
847+
usedArrayInput,
848+
lastUserMessage,
849+
previousId,
850+
systemPrompt,
851+
messages,
852+
})
853+
yield* this._yieldResponsesResult(response, modelInfo)
854+
return
855+
}
856+
857+
// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
858+
const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
859+
const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
860+
usedArrayInput,
861+
lastUserMessage,
862+
previousId,
863+
systemPrompt,
864+
messages,
865+
})
866+
867+
const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
868+
typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
869+
870+
if (isAsyncIterable(maybeStream)) {
871+
for await (const chunk of handleResponsesStream(maybeStream, {
872+
onResponseId: (id) => {
873+
this.lastResponseId = id
874+
},
875+
})) {
876+
yield chunk
877+
}
878+
} else {
879+
// Some providers may ignore the stream flag and return a complete response
880+
yield* this._yieldResponsesResult(maybeStream, modelInfo)
881+
}
882+
}
883+
872884
/**
873885
* Determines which OpenAI-compatible API flavor to use based on the URL path.
874886
* - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.).

0 commit comments

Comments
 (0)