Skip to content

Commit 12795fa

Browse files
committed
fix: Multimodel type conversion to Converse API format
1 parent 6d5df57 commit 12795fa

File tree

2 files changed

+112
-6
lines changed

2 files changed

+112
-6
lines changed

libs/aws/langchain_aws/chat_models/bedrock_converse.py

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,48 @@ def _parse_stream_event(event: Dict[str, Any]) -> Optional[BaseMessageChunk]:
12001200
raise ValueError(f"Received unsupported stream event:\n\n{event}")
12011201

12021202

1203+
def _mime_type_to_format(mime_type: str) -> str:
1204+
mime_to_format = {
1205+
# Image formats
1206+
"image/png": "png",
1207+
"image/jpeg": "jpeg",
1208+
"image/gif": "gif",
1209+
"image/webp": "webp",
1210+
# File formats
1211+
"application/pdf": "pdf",
1212+
"text/csv": "csv",
1213+
"application/msword": "doc",
1214+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
1215+
"application/vnd.ms-excel": "xls",
1216+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
1217+
"text/html": "html",
1218+
"text/plain": "txt",
1219+
"text/markdown": "md",
1220+
# Video formats
1221+
"video/x-matroska": "mkv",
1222+
"video/quicktime": "mov",
1223+
"video/mp4": "mp4",
1224+
"video/webm": "webm",
1225+
"video/x-flv": "flv",
1226+
"video/mpeg": "mpeg",
1227+
"video/x-ms-wmv": "wmv",
1228+
"video/3gpp": "three_gp",
1229+
}
1230+
1231+
if mime_type in mime_to_format:
1232+
return mime_to_format[mime_type]
1233+
1234+
# Fallback to original method of splitting on "/" for simple cases
1235+
all_formats = set(mime_to_format.values())
1236+
format_part = mime_type.split("/")[1]
1237+
if format_part in all_formats:
1238+
return format_part
1239+
1240+
raise ValueError(
1241+
f"Unsupported MIME type: {mime_type}. Please refer to the Bedrock Converse API documentation for supported formats."
1242+
)
1243+
1244+
12031245
def _format_data_content_block(block: dict) -> dict:
12041246
"""Format standard data content block to format expected by Converse API."""
12051247
if block["type"] == "image":
@@ -1209,7 +1251,7 @@ def _format_data_content_block(block: dict) -> dict:
12091251
raise ValueError(error_message)
12101252
formatted_block = {
12111253
"image": {
1212-
"format": block["mimeType"].split("/")[1],
1254+
"format": _mime_type_to_format(block["mimeType"]),
12131255
"source": {"bytes": _b64str_to_bytes(block["data"])},
12141256
}
12151257
}
@@ -1224,7 +1266,7 @@ def _format_data_content_block(block: dict) -> dict:
12241266
raise ValueError(error_message)
12251267
formatted_block = {
12261268
"document": {
1227-
"format": block["mimeType"].split("/")[1],
1269+
"format": _mime_type_to_format(block["mimeType"]),
12281270
"source": {"bytes": _b64str_to_bytes(block["data"])},
12291271
}
12301272
}
@@ -1274,7 +1316,7 @@ def _lc_content_to_bedrock(
12741316
bedrock_content.append(
12751317
{
12761318
"image": {
1277-
"format": block["source"]["mediaType"].split("/")[1],
1319+
"format": _mime_type_to_format(block["source"]["mediaType"]),
12781320
"source": {
12791321
"bytes": _b64str_to_bytes(block["source"]["data"])
12801322
},
@@ -1295,7 +1337,7 @@ def _lc_content_to_bedrock(
12951337
bedrock_content.append(
12961338
{
12971339
"video": {
1298-
"format": block["source"]["mediaType"].split("/")[1],
1340+
"format": _mime_type_to_format(block["source"]["mediaType"]),
12991341
"source": {
13001342
"bytes": _b64str_to_bytes(block["source"]["data"])
13011343
},
@@ -1306,7 +1348,7 @@ def _lc_content_to_bedrock(
13061348
bedrock_content.append(
13071349
{
13081350
"video": {
1309-
"format": block["source"]["mediaType"].split("/")[1],
1351+
"format": _mime_type_to_format(block["source"]["mediaType"]),
13101352
"source": {"s3Location": block["source"]["data"]},
13111353
}
13121354
}

libs/aws/tests/unit_tests/chat_models/test_bedrock_converse.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,70 @@ def test__lc_content_to_bedrock_reasoning_content_signature() -> None:
11071107
assert expected_system == actual_system
11081108

11091109

1110+
def test__lc_content_to_bedrock_mime_types() -> None:
1111+
video_data = base64.b64encode(b"video_test_data").decode("utf-8")
1112+
image_data = base64.b64encode(b"image_test_data").decode("utf-8")
1113+
file_data = base64.b64encode(b"file_test_data").decode("utf-8")
1114+
1115+
# Create content with one of each type
1116+
content: list[str | dict[str, Any]] = [
1117+
{
1118+
"type": "video",
1119+
"source": {
1120+
"type": "base64",
1121+
"mediaType": "video/mp4",
1122+
"data": video_data,
1123+
},
1124+
},
1125+
{
1126+
"type": "image",
1127+
"source": {
1128+
"type": "base64",
1129+
"mediaType": "image/jpeg",
1130+
"data": image_data,
1131+
},
1132+
},
1133+
{
1134+
"type": "file",
1135+
"sourceType": "base64",
1136+
"mimeType": "application/pdf",
1137+
"data": file_data,
1138+
"name": "test_document.pdf",
1139+
},
1140+
]
1141+
1142+
expected_content = [
1143+
{
1144+
"video": {
1145+
"format": "mp4",
1146+
"source": {
1147+
"bytes": base64.b64decode(video_data.encode("utf-8"))
1148+
},
1149+
}
1150+
},
1151+
{
1152+
"image": {
1153+
"format": "jpeg",
1154+
"source": {
1155+
"bytes": base64.b64decode(image_data.encode("utf-8"))
1156+
},
1157+
}
1158+
},
1159+
{
1160+
"document": {
1161+
"format": "pdf",
1162+
"name": "test_document.pdf",
1163+
"source": {
1164+
"bytes": base64.b64decode(file_data.encode("utf-8"))
1165+
},
1166+
}
1167+
},
1168+
]
1169+
1170+
bedrock_content = _lc_content_to_bedrock(content)
1171+
assert bedrock_content == expected_content
1172+
1173+
11101174
def test__get_provider() -> None:
11111175
llm = ChatBedrockConverse(
11121176
model="anthropic.claude-3-sonnet-20240229-v1:0", region_name="us-west-2"
@@ -1571,4 +1635,4 @@ def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
15711635
)
15721636

15731637
# The streaming should be disabled for models with no streaming support
1574-
assert chat_model.disable_streaming is True
1638+
assert chat_model.disable_streaming is True

0 commit comments

Comments
 (0)