Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/agents/extensions/models/litellm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,15 @@ async def _fetch_response(
stream: bool = False,
prompt: Any | None = None,
) -> litellm.types.utils.ModelResponse | tuple[Response, AsyncStream[ChatCompletionChunk]]:
converted_messages = Converter.items_to_messages(input)
# Preserve reasoning messages for tool calls when reasoning is on
# This is needed for models like Claude 4 Sonnet/Opus which support interleaved thinking
preserve_thinking_blocks = (
model_settings.reasoning is not None and model_settings.reasoning.effort is not None
)

converted_messages = Converter.items_to_messages(
input, preserve_thinking_blocks=preserve_thinking_blocks
)

if system_instructions:
converted_messages.insert(
Expand Down
80 changes: 65 additions & 15 deletions src/agents/models/chatcmpl_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
ResponseReasoningItemParam,
)
from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message
from openai.types.responses.response_reasoning_item import Summary
from openai.types.responses.response_reasoning_item import Content, Summary

from ..agent_output import AgentOutputSchemaBase
from ..exceptions import AgentsException, UserError
Expand Down Expand Up @@ -93,24 +93,38 @@ def convert_response_format(
def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TResponseOutputItem]:
items: list[TResponseOutputItem] = []

# Handle reasoning content if available
# Check if message is agents.extentions.models.litellm_model.InternalChatCompletionMessage
# We can't actually import it here because litellm is an optional dependency
# So we use hasattr to check for reasoning_content and thinking_blocks
if hasattr(message, "reasoning_content") and message.reasoning_content:
reasoning_item = ResponseReasoningItem(
id=FAKE_RESPONSES_ID,
summary=[Summary(text=message.reasoning_content, type="summary_text")],
type="reasoning",
)

# Store full thinking blocks for Anthropic compatibility
# Store thinking blocks for Anthropic compatibility
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
# Store thinking blocks in the reasoning item's content
# Convert thinking blocks to Content objects
from openai.types.responses.response_reasoning_item import Content

reasoning_item.content = [
Content(text=str(block.get("thinking", "")), type="reasoning_text")
for block in message.thinking_blocks
]
# Store thinking text in content and signature in encrypted_content
reasoning_item.content = []
signature = None
for block in message.thinking_blocks:
if isinstance(block, dict):
thinking_text = block.get("thinking", "")
if thinking_text:
reasoning_item.content.append(
Content(text=thinking_text, type="reasoning_text")
)
# Store the signature if present
if block.get("signature"):
signature = block.get("signature")

# Store only the last signature in encrypted_content
# If there are multiple thinking blocks, this should be a problem.
# In practice, there should only be one signature for the entire reasoning step.
# Tested with: claude-sonnet-4-20250514
if signature:
reasoning_item.encrypted_content = signature

items.append(reasoning_item)

Expand Down Expand Up @@ -301,10 +315,18 @@ def extract_all_content(
def items_to_messages(
cls,
items: str | Iterable[TResponseInputItem],
preserve_thinking_blocks: bool = False,
) -> list[ChatCompletionMessageParam]:
"""
Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.

Args:
items: A string or iterable of response input items to convert
preserve_thinking_blocks: Whether to preserve thinking blocks in tool calls
for reasoning models like Claude 4 Sonnet/Opus which support interleaved
thinking. When True, thinking blocks are reconstructed and included in
assistant messages with tool calls.

Rules:
- EasyInputMessage or InputMessage (role=user) => ChatCompletionUserMessageParam
- EasyInputMessage or InputMessage (role=system) => ChatCompletionSystemMessageParam
Expand All @@ -325,6 +347,7 @@ def items_to_messages(

result: list[ChatCompletionMessageParam] = []
current_assistant_msg: ChatCompletionAssistantMessageParam | None = None
pending_thinking_blocks: list[dict[str, str]] | None = None

def flush_assistant_message() -> None:
nonlocal current_assistant_msg
Expand All @@ -336,10 +359,11 @@ def flush_assistant_message() -> None:
current_assistant_msg = None

def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
nonlocal current_assistant_msg
nonlocal current_assistant_msg, pending_thinking_blocks
if current_assistant_msg is None:
current_assistant_msg = ChatCompletionAssistantMessageParam(role="assistant")
current_assistant_msg["tool_calls"] = []

return current_assistant_msg

for item in items:
Expand Down Expand Up @@ -455,6 +479,13 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:

elif func_call := cls.maybe_function_tool_call(item):
asst = ensure_assistant_message()

# If we have pending thinking blocks, use them as the content
# This is required for Anthropic API tool calls with interleaved thinking
if pending_thinking_blocks:
asst["content"] = pending_thinking_blocks # type: ignore
pending_thinking_blocks = None # Clear after using

tool_calls = list(asst.get("tool_calls", []))
arguments = func_call["arguments"] if func_call["arguments"] else "{}"
new_tool_call = ChatCompletionMessageFunctionToolCallParam(
Expand Down Expand Up @@ -483,9 +514,28 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
f"Encountered an item_reference, which is not supported: {item_ref}"
)

# 7) reasoning message => not handled
elif cls.maybe_reasoning_message(item):
pass
# 7) reasoning message => extract thinking blocks if present
elif reasoning_item := cls.maybe_reasoning_message(item):
# Reconstruct thinking blocks from content (text) and encrypted_content (signature)
content_items = reasoning_item.get("content", [])
signature = reasoning_item.get("encrypted_content")

if content_items and preserve_thinking_blocks:
# Reconstruct thinking blocks from content and signature
pending_thinking_blocks = []
for content_item in content_items:
if (
isinstance(content_item, dict)
and content_item.get("type") == "reasoning_text"
):
thinking_block = {
"type": "thinking",
"thinking": content_item.get("text", ""),
}
# Add signature if available
if signature:
thinking_block["signature"] = signature
pending_thinking_blocks.append(thinking_block)

# 8) If we haven't recognized it => fail or ignore
else:
Expand Down
30 changes: 29 additions & 1 deletion src/agents/models/chatcmpl_stream_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ class StreamingState:
# Fields for real-time function call streaming
function_call_streaming: dict[int, bool] = field(default_factory=dict)
function_call_output_idx: dict[int, int] = field(default_factory=dict)
# Store accumulated thinking text and signature for Anthropic compatibility
thinking_text: str = ""
thinking_signature: str | None = None


class SequenceNumber:
Expand Down Expand Up @@ -101,6 +104,19 @@ async def handle_stream(

delta = chunk.choices[0].delta

# Handle thinking blocks from Anthropic (for preserving signatures)
if hasattr(delta, "thinking_blocks") and delta.thinking_blocks:
for block in delta.thinking_blocks:
if isinstance(block, dict):
# Accumulate thinking text
thinking_text = block.get("thinking", "")
if thinking_text:
state.thinking_text += thinking_text
# Store signature if present
signature = block.get("signature")
if signature:
state.thinking_signature = signature

# Handle reasoning content for reasoning summaries
if hasattr(delta, "reasoning_content"):
reasoning_content = delta.reasoning_content
Expand Down Expand Up @@ -527,7 +543,19 @@ async def handle_stream(

# include Reasoning item if it exists
if state.reasoning_content_index_and_output:
outputs.append(state.reasoning_content_index_and_output[1])
reasoning_item = state.reasoning_content_index_and_output[1]
# Store thinking text in content and signature in encrypted_content
if state.thinking_text:
# Add thinking text as a Content object
if not reasoning_item.content:
reasoning_item.content = []
reasoning_item.content.append(
Content(text=state.thinking_text, type="reasoning_text")
)
# Store signature in encrypted_content
if state.thinking_signature:
reasoning_item.encrypted_content = state.thinking_signature
Comment on lines 545 to +557

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P1] Streaming path drops thinking block when Anthropic redacts reasoning text

The streaming handler only appends a reasoning_text content item when state.thinking_text is non-empty. For streams where Anthropic suppresses the thinking text and provides only a signature in thinking_blocks, thinking_text remains empty while thinking_signature is set. The resulting ResponseReasoningItem carries the signature but no content, so downstream conversion still produces an assistant message that begins with normal text and fails Anthropic’s extended-thinking requirement. A redacted thinking block should be emitted whenever a signature is collected even if no text arrives.

Useful? React with 👍 / 👎.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@codex This is the same as above.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

outputs.append(reasoning_item)

# include text or refusal content if they exist
if state.text_content_index_and_output or state.refusal_content_index_and_output:
Expand Down
117 changes: 116 additions & 1 deletion tests/test_anthropic_thinking_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@

from __future__ import annotations

from typing import Any
from typing import Any, cast

from openai.types.chat import ChatCompletionMessageToolCall
from openai.types.chat.chat_completion_message_tool_call import Function

from agents.extensions.models.litellm_model import InternalChatCompletionMessage
from agents.models.chatcmpl_converter import Converter
Expand Down Expand Up @@ -99,3 +102,115 @@ def test_reasoning_items_preserved_in_message_conversion():
thinking_block = reasoning_item.content[0]
assert thinking_block.type == "reasoning_text"
assert thinking_block.text == "I need to call the weather function for Paris"


def test_anthropic_thinking_blocks_with_tool_calls():
"""
Test for models with extended thinking and interleaved thinking with tool calls.

This test verifies the Anthropic's API's requirements for thinking blocks
to be the first content in assistant messages when reasoning is enabled and tool
calls are present.
"""
# Create a message with reasoning, thinking blocks and tool calls
message = InternalChatCompletionMessage(
role="assistant",
content="I'll check the weather for you.",
reasoning_content="The user wants weather information, I need to call the weather function",
thinking_blocks=[
{
"type": "thinking",
"thinking": (
"The user is asking about weather. "
"Let me use the weather tool to get this information."
),
"signature": "TestSignature123",
}
],
tool_calls=[
ChatCompletionMessageToolCall(
id="call_123",
type="function",
function=Function(name="get_weather", arguments='{"city": "Tokyo"}'),
)
],
)

# Step 1: Convert message to output items
output_items = Converter.message_to_output_items(message)

# Verify reasoning item exists and contains thinking blocks
reasoning_items = [
item for item in output_items if hasattr(item, "type") and item.type == "reasoning"
]
assert len(reasoning_items) == 1, "Should have exactly one reasoning item"

reasoning_item = reasoning_items[0]

# Verify thinking text is stored in content
assert hasattr(reasoning_item, "content") and reasoning_item.content, (
"Reasoning item should have content"
)
assert reasoning_item.content[0].type == "reasoning_text", (
"Content should be reasoning_text type"
)

# Verify signature is stored in encrypted_content
assert hasattr(reasoning_item, "encrypted_content"), (
"Reasoning item should have encrypted_content"
)
assert reasoning_item.encrypted_content == "TestSignature123", "Signature should be preserved"

# Verify tool calls are present
tool_call_items = [
item for item in output_items if hasattr(item, "type") and item.type == "function_call"
]
assert len(tool_call_items) == 1, "Should have exactly one tool call"

# Step 2: Convert output items back to messages
# Convert items to dicts for the converter (simulating serialization/deserialization)
items_as_dicts: list[dict[str, Any]] = []
for item in output_items:
if hasattr(item, "model_dump"):
items_as_dicts.append(item.model_dump())
else:
items_as_dicts.append(cast(dict[str, Any], item))

messages = Converter.items_to_messages(items_as_dicts, preserve_thinking_blocks=True) # type: ignore[arg-type]

# Find the assistant message with tool calls
assistant_messages = [
msg for msg in messages if msg.get("role") == "assistant" and msg.get("tool_calls")
]
assert len(assistant_messages) == 1, "Should have exactly one assistant message with tool calls"

assistant_msg = assistant_messages[0]

# Content must start with thinking blocks, not text
content = assistant_msg.get("content")
assert content is not None, "Assistant message should have content"

assert isinstance(content, list) and len(content) > 0, (
"Assistant message content should be a non-empty list"
)

first_content = content[0]
assert first_content.get("type") == "thinking", (
f"First content must be 'thinking' type for Anthropic compatibility, "
f"but got '{first_content.get('type')}'"
)
expected_thinking = (
"The user is asking about weather. Let me use the weather tool to get this information."
)
assert first_content.get("thinking") == expected_thinking, (
"Thinking content should be preserved"
)
# Signature should also be preserved
assert first_content.get("signature") == "TestSignature123", (
"Signature should be preserved in thinking block"
)

# Verify tool calls are preserved
tool_calls = assistant_msg.get("tool_calls", [])
assert len(cast(list[Any], tool_calls)) == 1, "Tool calls should be preserved"
assert cast(list[Any], tool_calls)[0]["function"]["name"] == "get_weather"