fix(standard-tests): update multimodal tests (#33781)

ccurme · web-flow · commit 81c4f21b5297 · 2025-11-01T16:38:20.000-04:00
diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@@ -1078,14 +1078,14 @@ class color_picker(BaseModel):  # noqa: N801
             "text": "what's your favorite color in this image",
         },
     ]
-    image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+    image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
     image_data = b64encode(httpx.get(image_url).content).decode("utf-8")
     human_content.append(
         {
             "type": "image",
             "source": {
                 "type": "base64",
-                "media_type": "image/jpeg",
+                "media_type": "image/png",
                 "data": image_data,
             },
         },
@@ -1098,7 +1098,7 @@ class color_picker(BaseModel):  # noqa: N801
                 {"type": "text", "text": "Hmm let me think about that"},
                 {
                     "type": "tool_use",
-                    "input": {"fav_color": "green"},
+                    "input": {"fav_color": "purple"},
                     "id": "foo",
                     "name": "color_picker",
                 },
@@ -1112,7 +1112,7 @@ class color_picker(BaseModel):  # noqa: N801
                     "content": [
                         {
                             "type": "text",
-                            "text": "green is a great pick! that's my sister's favorite color",  # noqa: E501
+                            "text": "purple is a great pick! that's my sister's favorite color",  # noqa: E501
                         },
                     ],
                     "is_error": False,
@@ -1122,7 +1122,7 @@ class color_picker(BaseModel):  # noqa: N801
         ),
     ]
     llm = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
-    llm.bind_tools([color_picker]).invoke(messages)
+    _ = llm.bind_tools([color_picker]).invoke(messages)
 
 
 @pytest.mark.default_cassette("test_web_search.yaml.gz")
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@@ -709,7 +709,7 @@ async def test_openai_response_headers_async(use_responses_api: bool) -> None:
 
 def test_image_token_counting_jpeg() -> None:
     model = ChatOpenAI(model="gpt-4o", temperature=0)
-    image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+    image_url = "https://raw.githubusercontent.com/langchain-ai/docs/9f99bb977307a1bd5efeb8dc6b67eb13904c4af1/src/oss/images/checkpoints.jpg"
     message = HumanMessage(
         content=[
             {"type": "text", "text": "describe the weather in this image"},
@@ -741,7 +741,7 @@ def test_image_token_counting_jpeg() -> None:
 
 def test_image_token_counting_png() -> None:
     model = ChatOpenAI(model="gpt-4o", temperature=0)
-    image_url = "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"
+    image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
     message = HumanMessage(
         content=[
             {"type": "text", "text": "how many dice are in this image"},
diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
@@ -4,6 +4,8 @@
 
 import base64
 import json
+import os
+import warnings
 from typing import Annotated, Any, Literal
 from unittest.mock import MagicMock
 
@@ -134,6 +136,21 @@ def _validate_tool_call_message_no_args(message: BaseMessage) -> None:
     assert tool_call.get("type") == "tool_call"
 
 
+def _get_base64_from_url(url: str) -> str:
+    user_agent = os.environ.get("LANGCHAIN_TESTS_USER_AGENT")
+    if not user_agent:
+        warning_message = (
+            "LANGCHAIN_TESTS_USER_AGENT environment variable not set. "
+            "langchain-tests pulls (CC0 License) audio data from wikimedia.org. "
+            "Consider setting a user agent to identify your requests. See "
+            "https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy"
+        )
+        warnings.warn(warning_message, stacklevel=2)
+    headers = {"User-Agent": user_agent} if user_agent else {}
+    httpx_response = httpx.get(url, headers=headers).content
+    return base64.b64encode(httpx_response).decode("utf-8")
+
+
 @tool
 def unicode_customer(customer_name: str, description: str) -> str:
     """Tool for creating a customer with Unicode name.
@@ -405,6 +422,16 @@ def supports_audio_inputs(self) -> bool:
             return True
         ```
 
+        Note: this test downloads audio data from wikimedia.org. You may need to set
+        the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these
+        requests, e.g.,
+
+        ```bash
+        export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0"
+        ```
+
+        Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).
+
     ??? info "`supports_video_inputs`"
 
         Boolean property indicating whether the chat model supports image inputs.
@@ -2429,6 +2456,16 @@ def test_audio_inputs(self, model: BaseChatModel) -> None:
         }
         ```
 
+        Note: this test downloads audio data from wikimedia.org. You may need to set
+        the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these
+        requests, e.g.,
+
+        ```bash
+        export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0"
+        ```
+
+        Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).
+
         ??? note "Configuration"
 
             To disable this test, set `supports_audio_inputs` to False in your
@@ -2447,12 +2484,14 @@ def supports_audio_inputs(self) -> bool:
             with audio content blocks, specifically base64-encoded files. Otherwise,
             set the `supports_audio_inputs` property to False.
 
-        """
+        """  # noqa: E501
         if not self.supports_audio_inputs:
             pytest.skip("Model does not support audio inputs.")
 
-        url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
-        audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
+        # https://commons.wikimedia.org/wiki/File:Northern_Flicker_202280456.wav
+        # License: CC0 1.0 Universal
+        url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/Northern_Flicker_202280456.wav"
+        audio_data = _get_base64_from_url(url)
 
         message = HumanMessage(
             [
@@ -2551,16 +2590,16 @@ def supports_image_urls(self) -> bool:
         if not self.supports_image_inputs:
             pytest.skip("Model does not support image message.")
 
-        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+        image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
         image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
 
         # OpenAI CC format, base64 data
         message = HumanMessage(
             content=[
-                {"type": "text", "text": "describe the weather in this image"},
+                {"type": "text", "text": "Give a concise description of this image."},
                 {
                     "type": "image_url",
-                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+                    "image_url": {"url": f"data:image/png;base64,{image_data}"},
                 },
             ],
         )
@@ -2569,11 +2608,11 @@ def supports_image_urls(self) -> bool:
         # Standard LangChain format, base64 data
         message = HumanMessage(
             content=[
-                {"type": "text", "text": "describe the weather in this image"},
+                {"type": "text", "text": "Give a concise description of this image."},
                 {
                     "type": "image",
                     "base64": image_data,
-                    "mime_type": "image/jpeg",
+                    "mime_type": "image/png",
                 },
             ],
         )
@@ -2583,7 +2622,10 @@ def supports_image_urls(self) -> bool:
         if self.supports_image_urls:
             message = HumanMessage(
                 content=[
-                    {"type": "text", "text": "describe the weather in this image"},
+                    {
+                        "type": "text",
+                        "text": "Give a concise description of this image.",
+                    },
                     {
                         "type": "image",
                         "url": image_url,
@@ -2654,15 +2696,15 @@ def supports_image_tool_message(self) -> bool:
         if not self.supports_image_tool_message:
             pytest.skip("Model does not support image tool message.")
 
-        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+        image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
         image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
 
         # OpenAI CC format, base64 data
         oai_format_message = ToolMessage(
             content=[
                 {
                     "type": "image_url",
-                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+                    "image_url": {"url": f"data:image/png;base64,{image_data}"},
                 },
             ],
             tool_call_id="1",
@@ -2675,7 +2717,7 @@ def supports_image_tool_message(self) -> bool:
                 {
                     "type": "image",
                     "base64": image_data,
-                    "mime_type": "image/jpeg",
+                    "mime_type": "image/png",
                 },
             ],
             tool_call_id="1",
@@ -2685,7 +2727,8 @@ def supports_image_tool_message(self) -> bool:
         for tool_message in [oai_format_message, standard_format_message]:
             messages = [
                 HumanMessage(
-                    "get a random image using the tool and describe the weather"
+                    "get a random diagram using the tool and give it a concise "
+                    "description"
                 ),
                 AIMessage(
                     [],
@@ -2888,14 +2931,14 @@ def supports_anthropic_inputs(self) -> bool:
             },
         ]
         if self.supports_image_inputs:
-            image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+            image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
             image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
             human_content.append(
                 {
                     "type": "image",
                     "source": {
                         "type": "base64",
-                        "media_type": "image/jpeg",
+                        "media_type": "image/png",
                         "data": image_data,
                     },
                 }
@@ -2908,15 +2951,15 @@ def supports_anthropic_inputs(self) -> bool:
                     {"type": "text", "text": "Hmm let me think about that"},
                     {
                         "type": "tool_use",
-                        "input": {"fav_color": "green"},
+                        "input": {"fav_color": "purple"},
                         "id": "foo",
                         "name": "color_picker",
                     },
                 ],
                 tool_calls=[
                     {
                         "name": "color_picker",
-                        "args": {"fav_color": "green"},
+                        "args": {"fav_color": "purple"},
                         "id": "foo",
                         "type": "tool_call",
                     }
diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
@@ -515,6 +515,16 @@ def supports_audio_inputs(self) -> bool:
             return True
         ```
 
+        Note: this test downloads audio data from wikimedia.org. You may need to set
+        the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these
+        requests, e.g.,
+
+        ```bash
+        export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0"
+        ```
+
+        Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy).
+
     ??? info "`supports_video_inputs`"
 
         Boolean property indicating whether the chat model supports image inputs.