Skip to content

Commit 7e7d25b

Browse files
authored
fix: Multimodal type conversion to Converse API format (#579)
Fixes #574. This PR updates the LangChain -> Bedrock message conversion utility in ChatBedrockConverse to correctly convert multimodal input MIME types to Converse API format (where supported).
1 parent 43b8ebd commit 7e7d25b

File tree

2 files changed

+145
-6
lines changed

2 files changed

+145
-6
lines changed

libs/aws/langchain_aws/chat_models/bedrock_converse.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import base64
2+
import functools
23
import json
34
import logging
45
import re
@@ -61,6 +62,33 @@
6162
logger = logging.getLogger(__name__)
6263
_BM = TypeVar("_BM", bound=BaseModel)
6364

65+
MIME_TO_FORMAT = {
66+
# Image formats
67+
"image/png": "png",
68+
"image/jpeg": "jpeg",
69+
"image/gif": "gif",
70+
"image/webp": "webp",
71+
# File formats
72+
"application/pdf": "pdf",
73+
"text/csv": "csv",
74+
"application/msword": "doc",
75+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
76+
"application/vnd.ms-excel": "xls",
77+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
78+
"text/html": "html",
79+
"text/plain": "txt",
80+
"text/markdown": "md",
81+
# Video formats
82+
"video/x-matroska": "mkv",
83+
"video/quicktime": "mov",
84+
"video/mp4": "mp4",
85+
"video/webm": "webm",
86+
"video/x-flv": "flv",
87+
"video/mpeg": "mpeg",
88+
"video/x-ms-wmv": "wmv",
89+
"video/3gpp": "three_gp",
90+
}
91+
6492
_DictOrPydanticClass = Union[Dict[str, Any], Type[_BM], Type]
6593

6694

@@ -1200,6 +1228,27 @@ def _parse_stream_event(event: Dict[str, Any]) -> Optional[BaseMessageChunk]:
12001228
raise ValueError(f"Received unsupported stream event:\n\n{event}")
12011229

12021230

1231+
@functools.cache
1232+
def _mime_type_to_format(mime_type: str) -> str:
1233+
if "/" not in mime_type:
1234+
raise ValueError(
1235+
f"Invalid MIME type format: {mime_type}. Expected format: 'type/subtype'"
1236+
)
1237+
1238+
if mime_type in MIME_TO_FORMAT:
1239+
return MIME_TO_FORMAT[mime_type]
1240+
1241+
# Fallback to original method of splitting on "/" for simple cases
1242+
all_formats = set(MIME_TO_FORMAT.values())
1243+
format_part = mime_type.split("/")[1]
1244+
if format_part in all_formats:
1245+
return format_part
1246+
1247+
raise ValueError(
1248+
f"Unsupported MIME type: {mime_type}. Please refer to the Bedrock Converse API documentation for supported formats."
1249+
)
1250+
1251+
12031252
def _format_data_content_block(block: dict) -> dict:
12041253
"""Format standard data content block to format expected by Converse API."""
12051254
if block["type"] == "image":
@@ -1209,7 +1258,7 @@ def _format_data_content_block(block: dict) -> dict:
12091258
raise ValueError(error_message)
12101259
formatted_block = {
12111260
"image": {
1212-
"format": block["mimeType"].split("/")[1],
1261+
"format": _mime_type_to_format(block["mimeType"]),
12131262
"source": {"bytes": _b64str_to_bytes(block["data"])},
12141263
}
12151264
}
@@ -1224,7 +1273,7 @@ def _format_data_content_block(block: dict) -> dict:
12241273
raise ValueError(error_message)
12251274
formatted_block = {
12261275
"document": {
1227-
"format": block["mimeType"].split("/")[1],
1276+
"format": _mime_type_to_format(block["mimeType"]),
12281277
"source": {"bytes": _b64str_to_bytes(block["data"])},
12291278
}
12301279
}
@@ -1274,7 +1323,7 @@ def _lc_content_to_bedrock(
12741323
bedrock_content.append(
12751324
{
12761325
"image": {
1277-
"format": block["source"]["mediaType"].split("/")[1],
1326+
"format": _mime_type_to_format(block["source"]["mediaType"]),
12781327
"source": {
12791328
"bytes": _b64str_to_bytes(block["source"]["data"])
12801329
},
@@ -1295,7 +1344,7 @@ def _lc_content_to_bedrock(
12951344
bedrock_content.append(
12961345
{
12971346
"video": {
1298-
"format": block["source"]["mediaType"].split("/")[1],
1347+
"format": _mime_type_to_format(block["source"]["mediaType"]),
12991348
"source": {
13001349
"bytes": _b64str_to_bytes(block["source"]["data"])
13011350
},
@@ -1306,7 +1355,7 @@ def _lc_content_to_bedrock(
13061355
bedrock_content.append(
13071356
{
13081357
"video": {
1309-
"format": block["source"]["mediaType"].split("/")[1],
1358+
"format": _mime_type_to_format(block["source"]["mediaType"]),
13101359
"source": {"s3Location": block["source"]["data"]},
13111360
}
13121361
}

libs/aws/tests/unit_tests/chat_models/test_bedrock_converse.py

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,95 @@ def test__lc_content_to_bedrock_reasoning_content_signature() -> None:
11071107
assert expected_system == actual_system
11081108

11091109

1110+
def test__lc_content_to_bedrock_mime_types() -> None:
1111+
video_data = base64.b64encode(b"video_test_data").decode("utf-8")
1112+
image_data = base64.b64encode(b"image_test_data").decode("utf-8")
1113+
file_data = base64.b64encode(b"file_test_data").decode("utf-8")
1114+
1115+
# Create content with one of each type
1116+
content: List[Union[str, Dict[str, Any]]] = [
1117+
{
1118+
"type": "video",
1119+
"source": {
1120+
"type": "base64",
1121+
"mediaType": "video/mp4",
1122+
"data": video_data,
1123+
},
1124+
},
1125+
{
1126+
"type": "image",
1127+
"source": {
1128+
"type": "base64",
1129+
"mediaType": "image/jpeg",
1130+
"data": image_data,
1131+
},
1132+
},
1133+
{
1134+
"type": "file",
1135+
"sourceType": "base64",
1136+
"mimeType": "application/pdf",
1137+
"data": file_data,
1138+
"name": "test_document.pdf",
1139+
},
1140+
]
1141+
1142+
expected_content = [
1143+
{
1144+
"video": {
1145+
"format": "mp4",
1146+
"source": {
1147+
"bytes": base64.b64decode(video_data.encode("utf-8"))
1148+
},
1149+
}
1150+
},
1151+
{
1152+
"image": {
1153+
"format": "jpeg",
1154+
"source": {
1155+
"bytes": base64.b64decode(image_data.encode("utf-8"))
1156+
},
1157+
}
1158+
},
1159+
{
1160+
"document": {
1161+
"format": "pdf",
1162+
"name": "test_document.pdf",
1163+
"source": {
1164+
"bytes": base64.b64decode(file_data.encode("utf-8"))
1165+
},
1166+
}
1167+
},
1168+
]
1169+
1170+
bedrock_content = _lc_content_to_bedrock(content)
1171+
assert bedrock_content == expected_content
1172+
1173+
1174+
def test__lc_content_to_bedrock_mime_types_invalid() -> None:
1175+
with pytest.raises(ValueError, match="Invalid MIME type format"):
1176+
_lc_content_to_bedrock([
1177+
{
1178+
"type": "image",
1179+
"source": {
1180+
"type": "base64",
1181+
"mediaType": "invalidmimetype",
1182+
"data": base64.b64encode(b"test_data").decode("utf-8"),
1183+
},
1184+
}
1185+
])
1186+
1187+
with pytest.raises(ValueError, match="Unsupported MIME type"):
1188+
_lc_content_to_bedrock([
1189+
{
1190+
"type": "file",
1191+
"sourceType": "base64",
1192+
"mimeType": "application/unknown-format",
1193+
"data": base64.b64encode(b"test_data").decode("utf-8"),
1194+
"name": "test_document.xyz",
1195+
}
1196+
])
1197+
1198+
11101199
def test__get_provider() -> None:
11111200
llm = ChatBedrockConverse(
11121201
model="anthropic.claude-3-sonnet-20240229-v1:0", region_name="us-west-2"
@@ -1572,8 +1661,9 @@ def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
15721661

15731662
# The streaming should be disabled for models with no streaming support
15741663
assert chat_model.disable_streaming is True
1664+
15751665

15761666
def test_nova_provider_extraction() -> None:
15771667
"""Test that provider is correctly extracted from Nova model ID when not provided."""
15781668
model = ChatBedrockConverse(client=mock.MagicMock(), model="us.amazon.nova-pro-v1:0", region_name="us-west-2")
1579-
assert model.provider == "amazon"
1669+
assert model.provider == "amazon"

0 commit comments

Comments
 (0)