Skip to content

Commit 43eaa3c

Browse files
committed
fix(openai): Responses API parity with native structured input, continuity (previous_response_id/store), temp/verbosity gating, and image support (input_image/output_text)
1 parent 1144bf9 commit 43eaa3c

File tree

1 file changed

+84
-96
lines changed

1 file changed

+84
-96
lines changed

src/api/providers/openai.ts

Lines changed: 84 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -376,47 +376,55 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
376376

377377
// Use Responses API when selected (non-streaming convenience method)
378378
if (flavor === "responses") {
379-
// Build a single-turn formatted string input (Developer/User style) for Responses API
380-
const formattedInput = this._formatResponsesSingleMessage(
381-
{
382-
role: "user",
383-
content: [{ type: "text", text: prompt }],
384-
} as Anthropic.Messages.MessageParam,
385-
/*includeRole*/ true,
386-
)
379+
// Build structured single-turn input
387380
const payload: Record<string, unknown> = {
388381
model: model.id,
389-
input: formattedInput,
382+
input: [
383+
{
384+
role: "user",
385+
content: [{ type: "input_text", text: prompt }],
386+
},
387+
],
388+
stream: false,
389+
store: false,
390390
}
391391

392-
// Reasoning effort (Responses)
392+
// Reasoning effort (support "minimal"; include summary: "auto" unless disabled)
393393
const effort = (this.options.reasoningEffort || model.reasoningEffort) as
394394
| "minimal"
395395
| "low"
396396
| "medium"
397397
| "high"
398398
| undefined
399-
if (this.options.enableReasoningEffort && effort && effort !== "minimal") {
400-
payload.reasoning = { effort }
399+
if (this.options.enableReasoningEffort && effort) {
400+
;(
401+
payload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } }
402+
).reasoning = {
403+
effort,
404+
...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
405+
}
401406
}
402407

403-
// Temperature if set
404-
if (this.options.modelTemperature !== undefined) {
405-
payload.temperature = this.options.modelTemperature
408+
// Temperature if supported and set
409+
if (modelInfo.supportsTemperature !== false && this.options.modelTemperature !== undefined) {
410+
;(payload as Record<string, unknown>).temperature = this.options.modelTemperature
406411
}
407412

408-
// Verbosity via text.verbosity - include only when explicitly specified
409-
if (this.options.verbosity) {
410-
payload.text = { verbosity: this.options.verbosity as "low" | "medium" | "high" }
413+
// Verbosity via text.verbosity - include only when supported
414+
if (this.options.verbosity && modelInfo.supportsVerbosity) {
415+
;(payload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
416+
verbosity: this.options.verbosity as "low" | "medium" | "high",
417+
}
411418
}
412419

413420
// max_output_tokens
414421
if (this.options.includeMaxTokens === true) {
415-
payload.max_output_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
422+
;(payload as Record<string, unknown>).max_output_tokens =
423+
this.options.modelMaxTokens || modelInfo.maxTokens
416424
}
417425

418426
const response = await this._responsesCreateWithRetries(payload, {
419-
usedArrayInput: false,
427+
usedArrayInput: true,
420428
lastUserMessage: undefined,
421429
previousId: undefined,
422430
systemPrompt: "",
@@ -736,72 +744,29 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
736744
const modelId = this.options.openAiModelId ?? ""
737745
const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
738746

739-
// Build Responses payload (align with OpenAI Native Responses API formatting)
740-
// Azure- and Responses-compatible multimodal handling:
741-
// - Use array input ONLY when the latest user message contains images (initial turn)
742-
// - When previous_response_id is present, send only the latest user turn:
743-
// • Text-only => single string "User: ...", no Developer preface
744-
// • With images => one-item array containing only the latest user content (no Developer preface)
745-
const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
746-
const lastUserHasImages =
747-
!!lastUserMessage &&
748-
Array.isArray(lastUserMessage.content) &&
749-
lastUserMessage.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
750-
751-
// Conversation continuity (parity with OpenAiNativeHandler.prepareGpt5Input)
747+
// Determine conversation continuity id (skip when explicitly suppressed)
752748
const previousId = metadata?.suppressPreviousResponseId
753749
? undefined
754750
: (metadata?.previousResponseId ?? this.lastResponseId)
755751

756-
const minimalInputMode = Boolean(previousId)
757-
758-
let inputPayload: unknown
759-
if (minimalInputMode && lastUserMessage) {
760-
// Minimal-mode: only the latest user message (no Developer preface)
761-
if (lastUserHasImages) {
762-
// Single-item array with just the latest user content
763-
inputPayload = this._toResponsesInput([lastUserMessage])
764-
} else {
765-
// Single message string "User: ..."
766-
inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
767-
}
768-
} else if (lastUserHasImages && lastUserMessage) {
769-
// Initial turn with images: include Developer preface and minimal prior context to preserve continuity
770-
const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
771-
772-
const messagesForArray = messages.filter((m) => {
773-
if (m.role === "assistant") {
774-
return lastAssistantMessage ? m === lastAssistantMessage : false
775-
}
776-
if (m.role === "user") {
777-
const hasImage =
778-
Array.isArray(m.content) &&
779-
m.content.some((b: unknown) => (b as { type?: string } | undefined)?.type === "image")
780-
return hasImage || m === lastUserMessage
781-
}
782-
return false
783-
})
752+
// Prepare structured input for Responses API
753+
const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
754+
const minimalInputMode = Boolean(previousId && lastUserMessage)
784755

785-
const arrayInput = this._toResponsesInput(messagesForArray)
786-
const developerPreface = {
787-
role: "user" as const,
788-
content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
789-
}
790-
inputPayload = [developerPreface, ...arrayInput]
791-
} else {
792-
// Pure text history: full compact transcript (includes both user and assistant turns)
793-
inputPayload = this._formatResponsesInput(systemPrompt, messages)
794-
}
795-
const usedArrayInput = Array.isArray(inputPayload)
756+
const inputPayload = minimalInputMode
757+
? this._toResponsesInput([lastUserMessage as Anthropic.Messages.MessageParam])
758+
: this._toResponsesInput(messages)
796759

760+
// Build base payload: use top-level instructions; default to storing unless explicitly disabled
797761
const basePayload: Record<string, unknown> = {
798762
model: modelId,
799763
input: inputPayload,
800764
...(previousId ? { previous_response_id: previousId } : {}),
765+
instructions: systemPrompt,
766+
store: metadata?.store !== false,
801767
}
802768

803-
// Reasoning effort (Responses expects: reasoning: { effort, summary? })
804-
// Parity with native: support "minimal" and include summary: "auto" unless explicitly disabled
769+
// Reasoning effort (support "minimal"; include summary: "auto" unless disabled)
805770
if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) {
806771
const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as
807772
| "minimal"
@@ -811,26 +776,26 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
811776
| undefined
812777
if (effort) {
813778
;(
814-
basePayload as {
815-
reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" }
816-
}
779+
basePayload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } }
817780
).reasoning = {
818781
effort,
819782
...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
820783
}
821784
}
822785
}
823786

824-
// Temperature (only include when explicitly set by the user)
787+
// Temperature: include only if model supports it
825788
const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false)
826-
if (this.options.modelTemperature !== undefined) {
827-
basePayload.temperature = this.options.modelTemperature
828-
} else if (deepseekReasoner) {
829-
basePayload.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
789+
if (modelInfo.supportsTemperature !== false) {
790+
if (this.options.modelTemperature !== undefined) {
791+
;(basePayload as Record<string, unknown>).temperature = this.options.modelTemperature
792+
} else if (deepseekReasoner) {
793+
;(basePayload as Record<string, unknown>).temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
794+
}
830795
}
831796

832-
// Verbosity: include only when explicitly specified in settings
833-
if (this.options.verbosity) {
797+
// Verbosity: include only when model supports it
798+
if (this.options.verbosity && modelInfo.supportsVerbosity) {
834799
;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
835800
verbosity: this.options.verbosity as "low" | "medium" | "high",
836801
}
@@ -844,7 +809,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
844809
// Non-streaming path
845810
if (nonStreaming) {
846811
const response = await this._responsesCreateWithRetries(basePayload, {
847-
usedArrayInput,
812+
usedArrayInput: Array.isArray(inputPayload),
848813
lastUserMessage,
849814
previousId,
850815
systemPrompt,
@@ -857,7 +822,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
857822
// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
858823
const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
859824
const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
860-
usedArrayInput,
825+
usedArrayInput: Array.isArray(inputPayload),
861826
lastUserMessage,
862827
previousId,
863828
systemPrompt,
@@ -925,30 +890,53 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
925890

926891
private _toResponsesInput(anthropicMessages: Anthropic.Messages.MessageParam[]): Array<{
927892
role: "user" | "assistant"
928-
content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }>
893+
content: Array<
894+
| { type: "input_text"; text: string }
895+
| { type: "input_image"; image_url: string }
896+
| { type: "output_text"; text: string }
897+
>
929898
}> {
930899
const input: Array<{
931900
role: "user" | "assistant"
932-
content: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }>
901+
content: Array<
902+
| { type: "input_text"; text: string }
903+
| { type: "input_image"; image_url: string }
904+
| { type: "output_text"; text: string }
905+
>
933906
}> = []
934907

935908
for (const msg of anthropicMessages) {
936909
const role = msg.role === "assistant" ? "assistant" : "user"
937-
const parts: Array<{ type: "input_text"; text: string } | { type: "input_image"; image_url: string }> = []
910+
const parts: Array<
911+
| { type: "input_text"; text: string }
912+
| { type: "input_image"; image_url: string }
913+
| { type: "output_text"; text: string }
914+
> = []
938915

939916
if (typeof msg.content === "string") {
940917
if (msg.content.length > 0) {
941-
parts.push({ type: "input_text", text: msg.content })
918+
if (role === "assistant") {
919+
parts.push({ type: "output_text", text: msg.content })
920+
} else {
921+
parts.push({ type: "input_text", text: msg.content })
922+
}
942923
}
943-
} else {
924+
} else if (Array.isArray(msg.content)) {
944925
for (const block of msg.content) {
945926
if (block.type === "text") {
946-
parts.push({ type: "input_text", text: block.text })
927+
if (role === "assistant") {
928+
parts.push({ type: "output_text", text: block.text })
929+
} else {
930+
parts.push({ type: "input_text", text: block.text })
931+
}
947932
} else if (block.type === "image") {
948-
parts.push({
949-
type: "input_image",
950-
image_url: `data:${block.source.media_type};base64,${block.source.data}`,
951-
})
933+
// Images are treated as user input; ignore images on assistant turns
934+
if (role === "user") {
935+
parts.push({
936+
type: "input_image",
937+
image_url: `data:${block.source.media_type};base64,${block.source.data}`,
938+
})
939+
}
952940
}
953941
// tool_use/tool_result are omitted in this minimal mapping (can be added as needed)
954942
}

0 commit comments

Comments
 (0)