Skip to content

Commit 81c4f21

Browse files
authored
fix(standard-tests): update multimodal tests (#33781)
1 parent f2dab56 commit 81c4f21

File tree

4 files changed

+77
-24
lines changed

4 files changed

+77
-24
lines changed

libs/partners/anthropic/tests/integration_tests/test_chat_models.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,14 +1078,14 @@ class color_picker(BaseModel): # noqa: N801
10781078
"text": "what's your favorite color in this image",
10791079
},
10801080
]
1081-
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
1081+
image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
10821082
image_data = b64encode(httpx.get(image_url).content).decode("utf-8")
10831083
human_content.append(
10841084
{
10851085
"type": "image",
10861086
"source": {
10871087
"type": "base64",
1088-
"media_type": "image/jpeg",
1088+
"media_type": "image/png",
10891089
"data": image_data,
10901090
},
10911091
},
@@ -1098,7 +1098,7 @@ class color_picker(BaseModel): # noqa: N801
10981098
{"type": "text", "text": "Hmm let me think about that"},
10991099
{
11001100
"type": "tool_use",
1101-
"input": {"fav_color": "green"},
1101+
"input": {"fav_color": "purple"},
11021102
"id": "foo",
11031103
"name": "color_picker",
11041104
},
@@ -1112,7 +1112,7 @@ class color_picker(BaseModel): # noqa: N801
11121112
"content": [
11131113
{
11141114
"type": "text",
1115-
"text": "green is a great pick! that's my sister's favorite color", # noqa: E501
1115+
"text": "purple is a great pick! that's my sister's favorite color", # noqa: E501
11161116
},
11171117
],
11181118
"is_error": False,
@@ -1122,7 +1122,7 @@ class color_picker(BaseModel): # noqa: N801
11221122
),
11231123
]
11241124
llm = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
1125-
llm.bind_tools([color_picker]).invoke(messages)
1125+
_ = llm.bind_tools([color_picker]).invoke(messages)
11261126

11271127

11281128
@pytest.mark.default_cassette("test_web_search.yaml.gz")

libs/partners/openai/tests/integration_tests/chat_models/test_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ async def test_openai_response_headers_async(use_responses_api: bool) -> None:
709709

710710
def test_image_token_counting_jpeg() -> None:
711711
model = ChatOpenAI(model="gpt-4o", temperature=0)
712-
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
712+
image_url = "https://raw.githubusercontent.com/langchain-ai/docs/9f99bb977307a1bd5efeb8dc6b67eb13904c4af1/src/oss/images/checkpoints.jpg"
713713
message = HumanMessage(
714714
content=[
715715
{"type": "text", "text": "describe the weather in this image"},
@@ -741,7 +741,7 @@ def test_image_token_counting_jpeg() -> None:
741741

742742
def test_image_token_counting_png() -> None:
743743
model = ChatOpenAI(model="gpt-4o", temperature=0)
744-
image_url = "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"
744+
image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
745745
message = HumanMessage(
746746
content=[
747747
{"type": "text", "text": "how many dice are in this image"},

libs/standard-tests/langchain_tests/integration_tests/chat_models.py

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import base64
66
import json
7+
import os
8+
import warnings
79
from typing import Annotated, Any, Literal
810
from unittest.mock import MagicMock
911

@@ -134,6 +136,21 @@ def _validate_tool_call_message_no_args(message: BaseMessage) -> None:
134136
assert tool_call.get("type") == "tool_call"
135137

136138

139+
def _get_base64_from_url(url: str) -> str:
140+
user_agent = os.environ.get("LANGCHAIN_TESTS_USER_AGENT")
141+
if not user_agent:
142+
warning_message = (
143+
"LANGCHAIN_TESTS_USER_AGENT environment variable not set. "
144+
"langchain-tests pulls (CC0 License) audio data from wikimedia.org. "
145+
"Consider setting a user agent to identify your requests. See "
146+
"https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy"
147+
)
148+
warnings.warn(warning_message, stacklevel=2)
149+
headers = {"User-Agent": user_agent} if user_agent else {}
150+
httpx_response = httpx.get(url, headers=headers).content
151+
return base64.b64encode(httpx_response).decode("utf-8")
152+
153+
137154
@tool
138155
def unicode_customer(customer_name: str, description: str) -> str:
139156
"""Tool for creating a customer with Unicode name.
@@ -405,6 +422,16 @@ def supports_audio_inputs(self) -> bool:
405422
return True
406423
```
407424
425+
Note: this test downloads audio data from wikimedia.org. You may need to set
426+
the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these
427+
requests, e.g.,
428+
429+
```bash
430+
export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; [email protected]) generic-library/0.0"
431+
```
432+
433+
Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).
434+
408435
??? info "`supports_video_inputs`"
409436
410437
Boolean property indicating whether the chat model supports image inputs.
@@ -2429,6 +2456,16 @@ def test_audio_inputs(self, model: BaseChatModel) -> None:
24292456
}
24302457
```
24312458
2459+
Note: this test downloads audio data from wikimedia.org. You may need to set
2460+
the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these
2461+
requests, e.g.,
2462+
2463+
```bash
2464+
export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; [email protected]) generic-library/0.0"
2465+
```
2466+
2467+
Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).
2468+
24322469
??? note "Configuration"
24332470
24342471
To disable this test, set `supports_audio_inputs` to False in your
@@ -2447,12 +2484,14 @@ def supports_audio_inputs(self) -> bool:
24472484
with audio content blocks, specifically base64-encoded files. Otherwise,
24482485
set the `supports_audio_inputs` property to False.
24492486
2450-
"""
2487+
""" # noqa: E501
24512488
if not self.supports_audio_inputs:
24522489
pytest.skip("Model does not support audio inputs.")
24532490

2454-
url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
2455-
audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
2491+
# https://commons.wikimedia.org/wiki/File:Northern_Flicker_202280456.wav
2492+
# License: CC0 1.0 Universal
2493+
url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/Northern_Flicker_202280456.wav"
2494+
audio_data = _get_base64_from_url(url)
24562495

24572496
message = HumanMessage(
24582497
[
@@ -2551,16 +2590,16 @@ def supports_image_urls(self) -> bool:
25512590
if not self.supports_image_inputs:
25522591
pytest.skip("Model does not support image message.")
25532592

2554-
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
2593+
image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
25552594
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
25562595

25572596
# OpenAI CC format, base64 data
25582597
message = HumanMessage(
25592598
content=[
2560-
{"type": "text", "text": "describe the weather in this image"},
2599+
{"type": "text", "text": "Give a concise description of this image."},
25612600
{
25622601
"type": "image_url",
2563-
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
2602+
"image_url": {"url": f"data:image/png;base64,{image_data}"},
25642603
},
25652604
],
25662605
)
@@ -2569,11 +2608,11 @@ def supports_image_urls(self) -> bool:
25692608
# Standard LangChain format, base64 data
25702609
message = HumanMessage(
25712610
content=[
2572-
{"type": "text", "text": "describe the weather in this image"},
2611+
{"type": "text", "text": "Give a concise description of this image."},
25732612
{
25742613
"type": "image",
25752614
"base64": image_data,
2576-
"mime_type": "image/jpeg",
2615+
"mime_type": "image/png",
25772616
},
25782617
],
25792618
)
@@ -2583,7 +2622,10 @@ def supports_image_urls(self) -> bool:
25832622
if self.supports_image_urls:
25842623
message = HumanMessage(
25852624
content=[
2586-
{"type": "text", "text": "describe the weather in this image"},
2625+
{
2626+
"type": "text",
2627+
"text": "Give a concise description of this image.",
2628+
},
25872629
{
25882630
"type": "image",
25892631
"url": image_url,
@@ -2654,15 +2696,15 @@ def supports_image_tool_message(self) -> bool:
26542696
if not self.supports_image_tool_message:
26552697
pytest.skip("Model does not support image tool message.")
26562698

2657-
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
2699+
image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
26582700
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
26592701

26602702
# OpenAI CC format, base64 data
26612703
oai_format_message = ToolMessage(
26622704
content=[
26632705
{
26642706
"type": "image_url",
2665-
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
2707+
"image_url": {"url": f"data:image/png;base64,{image_data}"},
26662708
},
26672709
],
26682710
tool_call_id="1",
@@ -2675,7 +2717,7 @@ def supports_image_tool_message(self) -> bool:
26752717
{
26762718
"type": "image",
26772719
"base64": image_data,
2678-
"mime_type": "image/jpeg",
2720+
"mime_type": "image/png",
26792721
},
26802722
],
26812723
tool_call_id="1",
@@ -2685,7 +2727,8 @@ def supports_image_tool_message(self) -> bool:
26852727
for tool_message in [oai_format_message, standard_format_message]:
26862728
messages = [
26872729
HumanMessage(
2688-
"get a random image using the tool and describe the weather"
2730+
"get a random diagram using the tool and give it a concise "
2731+
"description"
26892732
),
26902733
AIMessage(
26912734
[],
@@ -2888,14 +2931,14 @@ def supports_anthropic_inputs(self) -> bool:
28882931
},
28892932
]
28902933
if self.supports_image_inputs:
2891-
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
2934+
image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
28922935
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
28932936
human_content.append(
28942937
{
28952938
"type": "image",
28962939
"source": {
28972940
"type": "base64",
2898-
"media_type": "image/jpeg",
2941+
"media_type": "image/png",
28992942
"data": image_data,
29002943
},
29012944
}
@@ -2908,15 +2951,15 @@ def supports_anthropic_inputs(self) -> bool:
29082951
{"type": "text", "text": "Hmm let me think about that"},
29092952
{
29102953
"type": "tool_use",
2911-
"input": {"fav_color": "green"},
2954+
"input": {"fav_color": "purple"},
29122955
"id": "foo",
29132956
"name": "color_picker",
29142957
},
29152958
],
29162959
tool_calls=[
29172960
{
29182961
"name": "color_picker",
2919-
"args": {"fav_color": "green"},
2962+
"args": {"fav_color": "purple"},
29202963
"id": "foo",
29212964
"type": "tool_call",
29222965
}

libs/standard-tests/langchain_tests/unit_tests/chat_models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,16 @@ def supports_audio_inputs(self) -> bool:
515515
return True
516516
```
517517
518+
Note: this test downloads audio data from wikimedia.org. You may need to set
519+
the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these
520+
requests, e.g.,
521+
522+
```bash
523+
export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; [email protected]) generic-library/0.0"
524+
```
525+
526+
Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).
527+
518528
??? info "`supports_video_inputs`"
519529
520530
Boolean property indicating whether the chat model supports image inputs.

0 commit comments

Comments
 (0)