Skip to content

Commit eae569b

Browse files
committed
fix: preserve image attachments in OpenAI handler when legacy format is enabled
- Modified OpenAI handler to detect images in messages and use full OpenAI format even when legacy format is enabled - Added comprehensive tests to verify image handling works correctly with legacy format - Fixes issue where OpenAI would report "no attachment received" when images were sent with legacy format enabled Fixes #7012
1 parent bbe3362 commit eae569b

File tree

2 files changed

+353
-2
lines changed

2 files changed

+353
-2
lines changed
Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
// npx vitest run api/providers/__tests__/openai-image-legacy.spec.ts
2+
3+
import { OpenAiHandler } from "../openai"
4+
import { ApiHandlerOptions } from "../../../shared/api"
5+
import { Anthropic } from "@anthropic-ai/sdk"
6+
import OpenAI from "openai"
7+
8+
const mockCreate = vitest.fn()
9+
10+
vitest.mock("openai", () => {
11+
const mockConstructor = vitest.fn()
12+
return {
13+
__esModule: true,
14+
default: mockConstructor.mockImplementation(() => ({
15+
chat: {
16+
completions: {
17+
create: mockCreate.mockImplementation(async (options) => {
18+
if (!options.stream) {
19+
return {
20+
id: "test-completion",
21+
choices: [
22+
{
23+
message: { role: "assistant", content: "I can see the image", refusal: null },
24+
finish_reason: "stop",
25+
index: 0,
26+
},
27+
],
28+
usage: {
29+
prompt_tokens: 10,
30+
completion_tokens: 5,
31+
total_tokens: 15,
32+
},
33+
}
34+
}
35+
36+
return {
37+
[Symbol.asyncIterator]: async function* () {
38+
yield {
39+
choices: [
40+
{
41+
delta: { content: "I can see the image" },
42+
index: 0,
43+
},
44+
],
45+
usage: null,
46+
}
47+
yield {
48+
choices: [
49+
{
50+
delta: {},
51+
index: 0,
52+
},
53+
],
54+
usage: {
55+
prompt_tokens: 10,
56+
completion_tokens: 5,
57+
total_tokens: 15,
58+
},
59+
}
60+
},
61+
}
62+
}),
63+
},
64+
},
65+
})),
66+
}
67+
})
68+
69+
describe("OpenAiHandler - Image Handling with Legacy Format", () => {
70+
let handler: OpenAiHandler
71+
let mockOptions: ApiHandlerOptions
72+
73+
beforeEach(() => {
74+
mockOptions = {
75+
openAiApiKey: "test-api-key",
76+
openAiModelId: "gpt-4-vision-preview",
77+
openAiBaseUrl: "https://api.openai.com/v1",
78+
openAiLegacyFormat: true, // Enable legacy format
79+
}
80+
handler = new OpenAiHandler(mockOptions)
81+
mockCreate.mockClear()
82+
})
83+
84+
describe("Image handling with legacy format enabled", () => {
85+
const systemPrompt = "You are a helpful assistant that can analyze images."
86+
87+
it("should preserve images in messages when legacy format is enabled", async () => {
88+
const messagesWithImage: Anthropic.Messages.MessageParam[] = [
89+
{
90+
role: "user",
91+
content: [
92+
{
93+
type: "text" as const,
94+
text: "What's in this image?",
95+
},
96+
{
97+
type: "image" as const,
98+
source: {
99+
type: "base64" as const,
100+
media_type: "image/png",
101+
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
102+
},
103+
},
104+
],
105+
},
106+
]
107+
108+
const stream = handler.createMessage(systemPrompt, messagesWithImage)
109+
const chunks: any[] = []
110+
for await (const chunk of stream) {
111+
chunks.push(chunk)
112+
}
113+
114+
// Verify the API was called
115+
expect(mockCreate).toHaveBeenCalled()
116+
const callArgs = mockCreate.mock.calls[0][0]
117+
118+
// Check that messages were properly formatted with image data preserved
119+
expect(callArgs.messages).toBeDefined()
120+
expect(callArgs.messages.length).toBeGreaterThan(0)
121+
122+
// Find the user message with the image
123+
const userMessage = callArgs.messages.find((msg: any) => msg.role === "user" && Array.isArray(msg.content))
124+
expect(userMessage).toBeDefined()
125+
126+
// Verify the image content is preserved in OpenAI format
127+
const imageContent = userMessage.content.find((part: any) => part.type === "image_url")
128+
expect(imageContent).toBeDefined()
129+
expect(imageContent.image_url).toBeDefined()
130+
expect(imageContent.image_url.url).toContain("data:image/png;base64,")
131+
expect(imageContent.image_url.url).toContain(
132+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
133+
)
134+
})
135+
136+
it("should use simple format for text-only messages when legacy format is enabled", async () => {
137+
const textOnlyMessages: Anthropic.Messages.MessageParam[] = [
138+
{
139+
role: "user",
140+
content: "Hello, how are you?",
141+
},
142+
]
143+
144+
const stream = handler.createMessage(systemPrompt, textOnlyMessages)
145+
const chunks: any[] = []
146+
for await (const chunk of stream) {
147+
chunks.push(chunk)
148+
}
149+
150+
// Verify the API was called
151+
expect(mockCreate).toHaveBeenCalled()
152+
const callArgs = mockCreate.mock.calls[0][0]
153+
154+
// Check that messages were formatted as simple strings for text-only content
155+
expect(callArgs.messages).toBeDefined()
156+
expect(callArgs.messages.length).toBeGreaterThan(0)
157+
158+
// Find the user message
159+
const userMessage = callArgs.messages.find(
160+
(msg: any) => msg.role === "user" && msg.content === "Hello, how are you?",
161+
)
162+
expect(userMessage).toBeDefined()
163+
expect(typeof userMessage.content).toBe("string")
164+
})
165+
166+
it("should handle mixed messages with and without images correctly", async () => {
167+
const mixedMessages: Anthropic.Messages.MessageParam[] = [
168+
{
169+
role: "user",
170+
content: "First message without image",
171+
},
172+
{
173+
role: "assistant",
174+
content: "I understand",
175+
},
176+
{
177+
role: "user",
178+
content: [
179+
{
180+
type: "text" as const,
181+
text: "Now here's an image",
182+
},
183+
{
184+
type: "image" as const,
185+
source: {
186+
type: "base64" as const,
187+
media_type: "image/jpeg",
188+
data: "base64imagedata",
189+
},
190+
},
191+
],
192+
},
193+
]
194+
195+
const stream = handler.createMessage(systemPrompt, mixedMessages)
196+
const chunks: any[] = []
197+
for await (const chunk of stream) {
198+
chunks.push(chunk)
199+
}
200+
201+
// Verify the API was called
202+
expect(mockCreate).toHaveBeenCalled()
203+
const callArgs = mockCreate.mock.calls[0][0]
204+
205+
// Since there's an image in the messages, all messages should use OpenAI format
206+
expect(callArgs.messages).toBeDefined()
207+
208+
// Find the user message with the image
209+
const userMessageWithImage = callArgs.messages.find(
210+
(msg: any) =>
211+
msg.role === "user" &&
212+
Array.isArray(msg.content) &&
213+
msg.content.some((part: any) => part.type === "image_url"),
214+
)
215+
expect(userMessageWithImage).toBeDefined()
216+
217+
// Verify the image is properly formatted
218+
const imageContent = userMessageWithImage.content.find((part: any) => part.type === "image_url")
219+
expect(imageContent.image_url.url).toContain("data:image/jpeg;base64,base64imagedata")
220+
})
221+
222+
it("should handle non-streaming mode with images and legacy format", async () => {
223+
const handler = new OpenAiHandler({
224+
...mockOptions,
225+
openAiStreamingEnabled: false,
226+
})
227+
228+
const messagesWithImage: Anthropic.Messages.MessageParam[] = [
229+
{
230+
role: "user",
231+
content: [
232+
{
233+
type: "text" as const,
234+
text: "Analyze this",
235+
},
236+
{
237+
type: "image" as const,
238+
source: {
239+
type: "base64" as const,
240+
media_type: "image/gif",
241+
data: "gifbase64data",
242+
},
243+
},
244+
],
245+
},
246+
]
247+
248+
const stream = handler.createMessage(systemPrompt, messagesWithImage)
249+
const chunks: any[] = []
250+
for await (const chunk of stream) {
251+
chunks.push(chunk)
252+
}
253+
254+
// Verify the API was called
255+
expect(mockCreate).toHaveBeenCalled()
256+
const callArgs = mockCreate.mock.calls[0][0]
257+
258+
// Verify streaming is disabled
259+
expect(callArgs.stream).toBeUndefined()
260+
261+
// Find the user message with the image
262+
const userMessage = callArgs.messages.find((msg: any) => msg.role === "user" && Array.isArray(msg.content))
263+
expect(userMessage).toBeDefined()
264+
265+
// Verify the image content is preserved
266+
const imageContent = userMessage.content.find((part: any) => part.type === "image_url")
267+
expect(imageContent).toBeDefined()
268+
expect(imageContent.image_url.url).toContain("data:image/gif;base64,gifbase64data")
269+
})
270+
})
271+
272+
describe("Image handling without legacy format", () => {
273+
beforeEach(() => {
274+
mockOptions = {
275+
openAiApiKey: "test-api-key",
276+
openAiModelId: "gpt-4-vision-preview",
277+
openAiBaseUrl: "https://api.openai.com/v1",
278+
openAiLegacyFormat: false, // Disable legacy format
279+
}
280+
handler = new OpenAiHandler(mockOptions)
281+
mockCreate.mockClear()
282+
})
283+
284+
it("should properly handle images when legacy format is disabled", async () => {
285+
const messagesWithImage: Anthropic.Messages.MessageParam[] = [
286+
{
287+
role: "user",
288+
content: [
289+
{
290+
type: "text" as const,
291+
text: "What's in this image?",
292+
},
293+
{
294+
type: "image" as const,
295+
source: {
296+
type: "base64" as const,
297+
media_type: "image/png",
298+
data: "testImageData",
299+
},
300+
},
301+
],
302+
},
303+
]
304+
305+
const stream = handler.createMessage("System prompt", messagesWithImage)
306+
const chunks: any[] = []
307+
for await (const chunk of stream) {
308+
chunks.push(chunk)
309+
}
310+
311+
// Verify the API was called
312+
expect(mockCreate).toHaveBeenCalled()
313+
const callArgs = mockCreate.mock.calls[0][0]
314+
315+
// Check that messages use OpenAI format with images preserved
316+
const userMessage = callArgs.messages.find((msg: any) => msg.role === "user" && Array.isArray(msg.content))
317+
expect(userMessage).toBeDefined()
318+
319+
// Verify the image content is preserved
320+
const imageContent = userMessage.content.find((part: any) => part.type === "image_url")
321+
expect(imageContent).toBeDefined()
322+
expect(imageContent.image_url.url).toContain("data:image/png;base64,testImageData")
323+
})
324+
})
325+
})

src/api/providers/openai.ts

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
108108
if (deepseekReasoner) {
109109
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
110110
} else if (ark || enabledLegacyFormat) {
111-
convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)]
111+
// For legacy format, we still need to preserve images for proper API functionality
112+
// Only use simple format for text-only messages
113+
const hasImages = messages.some(
114+
(msg) => Array.isArray(msg.content) && msg.content.some((part) => part.type === "image"),
115+
)
116+
117+
if (hasImages) {
118+
// Use full OpenAI format to preserve images
119+
convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)]
120+
} else {
121+
// Use simple format for text-only messages
122+
convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)]
123+
}
112124
} else {
113125
if (modelInfo.supportsPromptCache) {
114126
systemMessage = {
@@ -222,7 +234,21 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
222234
messages: deepseekReasoner
223235
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
224236
: enabledLegacyFormat
225-
? [systemMessage, ...convertToSimpleMessages(messages)]
237+
? (() => {
238+
// For legacy format, check if there are images
239+
const hasImages = messages.some(
240+
(msg) =>
241+
Array.isArray(msg.content) && msg.content.some((part) => part.type === "image"),
242+
)
243+
244+
if (hasImages) {
245+
// Use full OpenAI format to preserve images
246+
return [systemMessage, ...convertToOpenAiMessages(messages)]
247+
} else {
248+
// Use simple format for text-only messages
249+
return [systemMessage, ...convertToSimpleMessages(messages)]
250+
}
251+
})()
226252
: [systemMessage, ...convertToOpenAiMessages(messages)],
227253
}
228254

0 commit comments

Comments
 (0)