Skip to content

Commit 95a395d

Browse files
authored
[Bugfix] Fix Anthropic API base64 image handling in Messages endpoint (vllm-project#35557)
Signed-off-by: Martin Vit <martin@voipmonitor.org>
1 parent e94b263 commit 95a395d

File tree

2 files changed

+389
-5
lines changed

2 files changed

+389
-5
lines changed
Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""Unit tests for Anthropic-to-OpenAI request conversion.
4+
5+
Tests the image source handling and tool_result content parsing in
6+
AnthropicServingMessages._convert_anthropic_to_openai_request().
7+
"""
8+
9+
from vllm.entrypoints.anthropic.protocol import (
10+
AnthropicMessagesRequest,
11+
)
12+
from vllm.entrypoints.anthropic.serving import AnthropicServingMessages
13+
14+
_convert = AnthropicServingMessages._convert_anthropic_to_openai_request
15+
_img_url = AnthropicServingMessages._convert_image_source_to_url
16+
17+
18+
def _make_request(
19+
messages: list[dict],
20+
**kwargs,
21+
) -> AnthropicMessagesRequest:
22+
return AnthropicMessagesRequest(
23+
model="test-model",
24+
max_tokens=128,
25+
messages=messages,
26+
**kwargs,
27+
)
28+
29+
30+
# ======================================================================
31+
# _convert_image_source_to_url
32+
# ======================================================================
33+
34+
35+
class TestConvertImageSourceToUrl:
36+
def test_base64_source(self):
37+
source = {
38+
"type": "base64",
39+
"media_type": "image/jpeg",
40+
"data": "iVBORw0KGgo=",
41+
}
42+
assert _img_url(source) == "data:image/jpeg;base64,iVBORw0KGgo="
43+
44+
def test_base64_png(self):
45+
source = {
46+
"type": "base64",
47+
"media_type": "image/png",
48+
"data": "AAAA",
49+
}
50+
assert _img_url(source) == "data:image/png;base64,AAAA"
51+
52+
def test_url_source(self):
53+
source = {
54+
"type": "url",
55+
"url": "https://example.com/image.jpg",
56+
}
57+
assert _img_url(source) == "https://example.com/image.jpg"
58+
59+
def test_missing_type_defaults_to_base64(self):
60+
"""When 'type' is absent, treat as base64."""
61+
source = {
62+
"media_type": "image/webp",
63+
"data": "UklGR",
64+
}
65+
assert _img_url(source) == "data:image/webp;base64,UklGR"
66+
67+
def test_missing_media_type_defaults_to_jpeg(self):
68+
source = {"type": "base64", "data": "abc123"}
69+
assert _img_url(source) == "data:image/jpeg;base64,abc123"
70+
71+
def test_url_source_missing_url_returns_empty(self):
72+
source = {"type": "url"}
73+
assert _img_url(source) == ""
74+
75+
def test_empty_source_returns_data_uri_shell(self):
76+
source: dict = {}
77+
assert _img_url(source) == "data:image/jpeg;base64,"
78+
79+
80+
# ======================================================================
81+
# Image blocks inside user messages
82+
# ======================================================================
83+
84+
85+
class TestImageContentBlocks:
86+
def test_base64_image_in_user_message(self):
87+
request = _make_request(
88+
[
89+
{
90+
"role": "user",
91+
"content": [
92+
{"type": "text", "text": "Describe this image"},
93+
{
94+
"type": "image",
95+
"source": {
96+
"type": "base64",
97+
"media_type": "image/jpeg",
98+
"data": "iVBORw0KGgo=",
99+
},
100+
},
101+
],
102+
}
103+
]
104+
)
105+
106+
result = _convert(request)
107+
user_msg = result.messages[0]
108+
assert user_msg["role"] == "user"
109+
110+
parts = user_msg["content"]
111+
assert len(parts) == 2
112+
assert parts[0] == {"type": "text", "text": "Describe this image"}
113+
assert parts[1] == {
114+
"type": "image_url",
115+
"image_url": {"url": "data:image/jpeg;base64,iVBORw0KGgo="},
116+
}
117+
118+
def test_url_image_in_user_message(self):
119+
request = _make_request(
120+
[
121+
{
122+
"role": "user",
123+
"content": [
124+
{"type": "text", "text": "What is this?"},
125+
{
126+
"type": "image",
127+
"source": {
128+
"type": "url",
129+
"url": "https://example.com/cat.png",
130+
},
131+
},
132+
],
133+
}
134+
]
135+
)
136+
137+
result = _convert(request)
138+
parts = result.messages[0]["content"]
139+
assert parts[1] == {
140+
"type": "image_url",
141+
"image_url": {"url": "https://example.com/cat.png"},
142+
}
143+
144+
145+
# ======================================================================
146+
# tool_result content handling
147+
# ======================================================================
148+
149+
150+
class TestToolResultContent:
151+
def _make_tool_result_request(
152+
self, tool_result_content
153+
) -> AnthropicMessagesRequest:
154+
"""Build a request with assistant tool_use followed by user
155+
tool_result."""
156+
return _make_request(
157+
[
158+
{
159+
"role": "assistant",
160+
"content": [
161+
{
162+
"type": "tool_use",
163+
"id": "call_001",
164+
"name": "read_file",
165+
"input": {"path": "/tmp/img.png"},
166+
}
167+
],
168+
},
169+
{
170+
"role": "user",
171+
"content": [
172+
{
173+
"type": "tool_result",
174+
"tool_use_id": "call_001",
175+
"content": tool_result_content,
176+
}
177+
],
178+
},
179+
]
180+
)
181+
182+
def test_tool_result_string_content(self):
183+
request = self._make_tool_result_request("file contents here")
184+
result = _convert(request)
185+
186+
tool_msg = [m for m in result.messages if m["role"] == "tool"]
187+
assert len(tool_msg) == 1
188+
assert tool_msg[0]["content"] == "file contents here"
189+
assert tool_msg[0]["tool_call_id"] == "call_001"
190+
191+
def test_tool_result_text_blocks(self):
192+
request = self._make_tool_result_request(
193+
[
194+
{"type": "text", "text": "line 1"},
195+
{"type": "text", "text": "line 2"},
196+
]
197+
)
198+
result = _convert(request)
199+
200+
tool_msg = [m for m in result.messages if m["role"] == "tool"]
201+
assert len(tool_msg) == 1
202+
assert tool_msg[0]["content"] == "line 1\nline 2"
203+
204+
def test_tool_result_with_image(self):
205+
"""Image in tool_result should produce a follow-up user message."""
206+
request = self._make_tool_result_request(
207+
[
208+
{
209+
"type": "image",
210+
"source": {
211+
"type": "base64",
212+
"media_type": "image/png",
213+
"data": "AAAA",
214+
},
215+
}
216+
]
217+
)
218+
result = _convert(request)
219+
220+
tool_msg = [m for m in result.messages if m["role"] == "tool"]
221+
assert len(tool_msg) == 1
222+
assert tool_msg[0]["content"] == ""
223+
224+
# The image should be injected as a follow-up user message
225+
follow_up = [
226+
m
227+
for m in result.messages
228+
if m["role"] == "user" and isinstance(m.get("content"), list)
229+
]
230+
assert len(follow_up) == 1
231+
img_parts = follow_up[0]["content"]
232+
assert len(img_parts) == 1
233+
assert img_parts[0] == {
234+
"type": "image_url",
235+
"image_url": {"url": "data:image/png;base64,AAAA"},
236+
}
237+
238+
def test_tool_result_with_text_and_image(self):
239+
"""Mixed text+image tool_result: text in tool msg, image in user
240+
msg."""
241+
request = self._make_tool_result_request(
242+
[
243+
{"type": "text", "text": "Here is the screenshot"},
244+
{
245+
"type": "image",
246+
"source": {
247+
"type": "base64",
248+
"media_type": "image/jpeg",
249+
"data": "QUFB",
250+
},
251+
},
252+
]
253+
)
254+
result = _convert(request)
255+
256+
tool_msg = [m for m in result.messages if m["role"] == "tool"]
257+
assert len(tool_msg) == 1
258+
assert tool_msg[0]["content"] == "Here is the screenshot"
259+
260+
follow_up = [
261+
m
262+
for m in result.messages
263+
if m["role"] == "user" and isinstance(m.get("content"), list)
264+
]
265+
assert len(follow_up) == 1
266+
assert follow_up[0]["content"][0]["image_url"]["url"] == (
267+
"data:image/jpeg;base64,QUFB"
268+
)
269+
270+
def test_tool_result_with_multiple_images(self):
271+
request = self._make_tool_result_request(
272+
[
273+
{
274+
"type": "image",
275+
"source": {
276+
"type": "base64",
277+
"media_type": "image/png",
278+
"data": "IMG1",
279+
},
280+
},
281+
{
282+
"type": "image",
283+
"source": {
284+
"type": "url",
285+
"url": "https://example.com/img2.jpg",
286+
},
287+
},
288+
]
289+
)
290+
result = _convert(request)
291+
292+
follow_up = [
293+
m
294+
for m in result.messages
295+
if m["role"] == "user" and isinstance(m.get("content"), list)
296+
]
297+
assert len(follow_up) == 1
298+
urls = [p["image_url"]["url"] for p in follow_up[0]["content"]]
299+
assert urls == [
300+
"data:image/png;base64,IMG1",
301+
"https://example.com/img2.jpg",
302+
]
303+
304+
def test_tool_result_none_content(self):
305+
request = self._make_tool_result_request(None)
306+
result = _convert(request)
307+
308+
tool_msg = [m for m in result.messages if m["role"] == "tool"]
309+
assert len(tool_msg) == 1
310+
assert tool_msg[0]["content"] == ""
311+
312+
def test_tool_result_no_follow_up_when_no_images(self):
313+
"""Ensure no extra user message is added when there are no images."""
314+
request = self._make_tool_result_request(
315+
[
316+
{"type": "text", "text": "just text"},
317+
]
318+
)
319+
result = _convert(request)
320+
321+
user_follow_ups = [
322+
m
323+
for m in result.messages
324+
if m["role"] == "user" and isinstance(m.get("content"), list)
325+
]
326+
assert len(user_follow_ups) == 0

0 commit comments

Comments
 (0)