Skip to content

Commit 558cacb

Browse files
authored
fix(vertexai): mime type parsing from image URLs with params (#1377)
1 parent 084e295 commit 558cacb

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed

libs/vertexai/langchain_google_vertexai/_anthropic_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import base64
22
import re
3+
import urllib
34
import warnings
45
from collections.abc import Callable, Sequence
56
from typing import (
@@ -90,7 +91,8 @@ def _format_image(image_url: str, project: str | None) -> dict:
9091
if validators.url(image_url):
9192
loader = ImageBytesLoader(project=project)
9293
image_bytes = loader.load_bytes(image_url)
93-
raw_mime_type = image_url.split(".")[-1].lower()
94+
path = urllib.parse.urlparse(image_url).path
95+
raw_mime_type = path.split(".")[-1].lower()
9496
doc_type = "application" if raw_mime_type == "pdf" else "image"
9597
mime_type = (
9698
f"{doc_type}/jpeg"

libs/vertexai/tests/unit_tests/test_anthropic_utils.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Unit tests for _anthropic_utils.py."""
22

3+
import base64
4+
from unittest.mock import patch
35
import pytest
46
from anthropic.types import (
57
RawContentBlockDeltaEvent,
@@ -23,6 +25,8 @@
2325
_thinking_in_params,
2426
)
2527

28+
from langchain_google_vertexai._anthropic_utils import _format_image
29+
2630

2731
def test_format_message_anthropic_with_cache_control_in_kwargs() -> None:
2832
"""Test formatting a message with cache control in additional_kwargs."""
@@ -1309,3 +1313,34 @@ def test_tool_message_preserves_cache_control() -> None:
13091313
"tool_use_id": "call_1",
13101314
"cache_control": {"type": "ephemeral"},
13111315
}
1316+
1317+
1318+
@pytest.mark.parametrize(
1319+
("image_url", "expected_media_type"),
1320+
[
1321+
("https://example.com/image.png?token=123", "image/png"),
1322+
("https://example.com/image.jpg", "image/jpeg"),
1323+
("https://example.com/document.pdf", "application/pdf"),
1324+
],
1325+
)
1326+
def test_format_image(image_url: str, expected_media_type: str) -> None:
1327+
"""Test that _format_image correctly handles various URLs."""
1328+
project = "test-project"
1329+
1330+
with patch(
1331+
"langchain_google_vertexai._anthropic_utils.ImageBytesLoader"
1332+
) as MockLoader:
1333+
mock_loader_instance = MockLoader.return_value
1334+
mock_loader_instance.load_bytes.return_value = b"fake_image_data"
1335+
1336+
result = _format_image(image_url, project)
1337+
1338+
expected_data = base64.b64encode(b"fake_image_data").decode("ascii")
1339+
1340+
assert result == {
1341+
"type": "base64",
1342+
"media_type": expected_media_type,
1343+
"data": expected_data,
1344+
}
1345+
1346+
mock_loader_instance.load_bytes.assert_called_once_with(image_url)

0 commit comments

Comments
 (0)