Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions src/strands/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
"""

import asyncio
import base64
import binascii
import json
import logging
import os
Expand Down Expand Up @@ -350,6 +352,45 @@ def _should_include_tool_result_status(self) -> bool:
else: # "auto"
return any(model in self.config["model_id"] for model in _MODELS_INCLUDE_STATUS)

def _coerce_to_bytes(self, value: Any, *, expected_fmt: Optional[str] = None) -> bytes:
Copy link
Member

@pgrayy pgrayy Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not so sure this logic should exist in the BedrockModel provider. The expected type of ["source"]["bytes"] is bytes (src) and so users should be configuring this before passing the payload into Strands. Also, this would be a problem for other model providers as well.

I'm thinking that this logic should go into https://github.com/strands-agents/sdk-python/blob/main/src/strands/multiagent/a2a/executor.py if we are trying to resolve #850.

"""Normalize bytes-like inputs to raw bytes for Bedrock requests.

Args:
value: Input that should represent binary data.
expected_fmt: Optional file format hint used for error messaging.

Returns:
Raw bytes suitable for Bedrock's `source` payloads.

Raises:
TypeError: If the provided value cannot be interpreted as bytes.
"""
if hasattr(value, "read") and callable(value.read):
data = value.read()
if isinstance(data, bytes):
return data
if isinstance(data, str):
return data.encode("utf-8")
return bytes(data)

if isinstance(value, (bytes, bytearray, memoryview)):
return bytes(value)

# Base64-encoded strings (optionally data URLs)
if isinstance(value, str):
data_str = value
if data_str.startswith("data:") and ";base64," in data_str:
data_str = data_str.split(",", 1)[1]

try:
return base64.b64decode(data_str, validate=True)
except binascii.Error as exc:
raise TypeError(
f"document.source.bytes must be raw bytes or a base64-encoded string (format={expected_fmt!r})."
) from exc

raise TypeError(f"Unsupported type for bytes conversion: {type(value).__name__}")

def _format_request_message_content(self, content: ContentBlock) -> dict[str, Any]:
"""Format a Bedrock content block.

Expand Down Expand Up @@ -382,7 +423,14 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An

# Handle source
if "source" in document:
result["source"] = {"bytes": document["source"]["bytes"]}
source = document["source"]

if "bytes" in source:
result["source"] = {
"bytes": self._coerce_to_bytes(source["bytes"], expected_fmt=document.get("format"))
}
else:
raise TypeError("document.source must include 'bytes'")

# Handle optional fields
if "citations" in document and document["citations"] is not None:
Expand All @@ -405,7 +453,7 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An
source = image["source"]
formatted_source = {}
if "bytes" in source:
formatted_source = {"bytes": source["bytes"]}
formatted_source = {"bytes": self._coerce_to_bytes(source["bytes"], expected_fmt=image.get("format"))}
result = {"format": image["format"], "source": formatted_source}
return {"image": result}

Expand Down Expand Up @@ -470,7 +518,7 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An
source = video["source"]
formatted_source = {}
if "bytes" in source:
formatted_source = {"bytes": source["bytes"]}
formatted_source = {"bytes": self._coerce_to_bytes(source["bytes"], expected_fmt=video.get("format"))}
result = {"format": video["format"], "source": formatted_source}
return {"video": result}

Expand Down
68 changes: 68 additions & 0 deletions tests/strands/models/test_bedrock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import os
import sys
import unittest.mock
Expand Down Expand Up @@ -421,6 +422,73 @@ def test_format_request_tool_specs(model, messages, model_id, tool_spec):
assert tru_request == exp_request


def test_format_request_document_base64_bytes(model, model_id):
pdf_bytes = b"%PDF-1.4 test pdf"
encoded = base64.b64encode(pdf_bytes).decode("ascii")
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "testing.pdf",
"format": "pdf",
"source": {"bytes": encoded},
}
}
],
}
]

request = model.format_request(messages)

doc_source = request["messages"][0]["content"][0]["document"]["source"]
assert doc_source["bytes"] == pdf_bytes


def test_format_request_document_plain_text_raises(model):
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "testing.pdf",
"format": "pdf",
"source": {"bytes": "this is not base64"},
}
}
],
}
]

with pytest.raises(TypeError):
model.format_request(messages)


def test_format_request_document_raw_bytes(model):
pdf_bytes = b"%PDF-1.4 test pdf"
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "testing.pdf",
"format": "pdf",
"source": {"bytes": pdf_bytes},
}
}
],
}
]

request = model.format_request(messages)

doc_source = request["messages"][0]["content"][0]["document"]["source"]
assert doc_source["bytes"] == pdf_bytes


def test_format_request_tool_choice_auto(model, messages, model_id, tool_spec):
tool_choice = {"auto": {}}
tru_request = model.format_request(messages, [tool_spec], tool_choice=tool_choice)
Expand Down