@@ -1200,6 +1200,48 @@ def _parse_stream_event(event: Dict[str, Any]) -> Optional[BaseMessageChunk]:
1200
1200
raise ValueError (f"Received unsupported stream event:\n \n { event } " )
1201
1201
1202
1202
1203
+ def _mime_type_to_format (mime_type : str ) -> str :
1204
+ mime_to_format = {
1205
+ # Image formats
1206
+ "image/png" : "png" ,
1207
+ "image/jpeg" : "jpeg" ,
1208
+ "image/gif" : "gif" ,
1209
+ "image/webp" : "webp" ,
1210
+ # File formats
1211
+ "application/pdf" : "pdf" ,
1212
+ "text/csv" : "csv" ,
1213
+ "application/msword" : "doc" ,
1214
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" : "docx" ,
1215
+ "application/vnd.ms-excel" : "xls" ,
1216
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" : "xlsx" ,
1217
+ "text/html" : "html" ,
1218
+ "text/plain" : "txt" ,
1219
+ "text/markdown" : "md" ,
1220
+ # Video formats
1221
+ "video/x-matroska" : "mkv" ,
1222
+ "video/quicktime" : "mov" ,
1223
+ "video/mp4" : "mp4" ,
1224
+ "video/webm" : "webm" ,
1225
+ "video/x-flv" : "flv" ,
1226
+ "video/mpeg" : "mpeg" ,
1227
+ "video/x-ms-wmv" : "wmv" ,
1228
+ "video/3gpp" : "three_gp" ,
1229
+ }
1230
+
1231
+ if mime_type in mime_to_format :
1232
+ return mime_to_format [mime_type ]
1233
+
1234
+ # Fallback to original method of splitting on "/" for simple cases
1235
+ all_formats = set (mime_to_format .values ())
1236
+ format_part = mime_type .split ("/" )[1 ]
1237
+ if format_part in all_formats :
1238
+ return format_part
1239
+
1240
+ raise ValueError (
1241
+ f"Unsupported MIME type: { mime_type } . Please refer to the Bedrock Converse API documentation for supported formats."
1242
+ )
1243
+
1244
+
1203
1245
def _format_data_content_block (block : dict ) -> dict :
1204
1246
"""Format standard data content block to format expected by Converse API."""
1205
1247
if block ["type" ] == "image" :
@@ -1209,7 +1251,7 @@ def _format_data_content_block(block: dict) -> dict:
1209
1251
raise ValueError (error_message )
1210
1252
formatted_block = {
1211
1253
"image" : {
1212
- "format" : block ["mimeType" ]. split ( "/" )[ 1 ] ,
1254
+ "format" : _mime_type_to_format ( block ["mimeType" ]) ,
1213
1255
"source" : {"bytes" : _b64str_to_bytes (block ["data" ])},
1214
1256
}
1215
1257
}
@@ -1224,7 +1266,7 @@ def _format_data_content_block(block: dict) -> dict:
1224
1266
raise ValueError (error_message )
1225
1267
formatted_block = {
1226
1268
"document" : {
1227
- "format" : block ["mimeType" ]. split ( "/" )[ 1 ] ,
1269
+ "format" : _mime_type_to_format ( block ["mimeType" ]) ,
1228
1270
"source" : {"bytes" : _b64str_to_bytes (block ["data" ])},
1229
1271
}
1230
1272
}
@@ -1274,7 +1316,7 @@ def _lc_content_to_bedrock(
1274
1316
bedrock_content .append (
1275
1317
{
1276
1318
"image" : {
1277
- "format" : block ["source" ]["mediaType" ]. split ( "/" )[ 1 ] ,
1319
+ "format" : _mime_type_to_format ( block ["source" ]["mediaType" ]) ,
1278
1320
"source" : {
1279
1321
"bytes" : _b64str_to_bytes (block ["source" ]["data" ])
1280
1322
},
@@ -1295,7 +1337,7 @@ def _lc_content_to_bedrock(
1295
1337
bedrock_content .append (
1296
1338
{
1297
1339
"video" : {
1298
- "format" : block ["source" ]["mediaType" ]. split ( "/" )[ 1 ] ,
1340
+ "format" : _mime_type_to_format ( block ["source" ]["mediaType" ]) ,
1299
1341
"source" : {
1300
1342
"bytes" : _b64str_to_bytes (block ["source" ]["data" ])
1301
1343
},
@@ -1306,7 +1348,7 @@ def _lc_content_to_bedrock(
1306
1348
bedrock_content .append (
1307
1349
{
1308
1350
"video" : {
1309
- "format" : block ["source" ]["mediaType" ]. split ( "/" )[ 1 ] ,
1351
+ "format" : _mime_type_to_format ( block ["source" ]["mediaType" ]) ,
1310
1352
"source" : {"s3Location" : block ["source" ]["data" ]},
1311
1353
}
1312
1354
}
0 commit comments