Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions src/agents/extensions/models/litellm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@

class InternalChatCompletionMessage(ChatCompletionMessage):
"""
An internal subclass to carry reasoning_content without modifying the original model.
"""
An internal subclass to carry reasoning_content and thinking_blocks without modifying the original model.
""" # noqa: E501

reasoning_content: str
thinking_blocks: list[dict[str, Any]] | None = None


class LitellmModel(Model):
Expand Down Expand Up @@ -401,6 +402,26 @@ def convert_message_to_openai(
if hasattr(message, "reasoning_content") and message.reasoning_content:
reasoning_content = message.reasoning_content

# Extract full thinking blocks including signatures (for Anthropic)
thinking_blocks: list[dict[str, Any]] | None = None
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
# Convert thinking blocks to dict format for compatibility
thinking_blocks = []
for block in message.thinking_blocks:
if isinstance(block, dict):
thinking_blocks.append(cast(dict[str, Any], block))
else:
# Convert object to dict by accessing its attributes
block_dict: dict[str, Any] = {}
if hasattr(block, '__dict__'):
block_dict = dict(block.__dict__.items())
elif hasattr(block, 'model_dump'):
block_dict = block.model_dump()
else:
# Last resort: convert to string representation
block_dict = {"thinking": str(block)}
thinking_blocks.append(block_dict)

return InternalChatCompletionMessage(
content=message.content,
refusal=refusal,
Expand All @@ -409,6 +430,7 @@ def convert_message_to_openai(
audio=message.get("audio", None), # litellm deletes audio if not present
tool_calls=tool_calls,
reasoning_content=reasoning_content,
thinking_blocks=thinking_blocks,
)

@classmethod
Expand Down
22 changes: 16 additions & 6 deletions src/agents/models/chatcmpl_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,24 @@ def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TRespon

# Handle reasoning content if available
if hasattr(message, "reasoning_content") and message.reasoning_content:
items.append(
ResponseReasoningItem(
id=FAKE_RESPONSES_ID,
summary=[Summary(text=message.reasoning_content, type="summary_text")],
type="reasoning",
)
reasoning_item = ResponseReasoningItem(
id=FAKE_RESPONSES_ID,
summary=[Summary(text=message.reasoning_content, type="summary_text")],
type="reasoning",
)

# Store full thinking blocks for Anthropic compatibility
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
# Store thinking blocks in the reasoning item's content
# Convert thinking blocks to Content objects
from openai.types.responses.response_reasoning_item import Content
reasoning_item.content = [
Content(text=str(block.get("thinking", "")), type="reasoning_text")
for block in message.thinking_blocks
]

items.append(reasoning_item)

message_item = ResponseOutputMessage(
id=FAKE_RESPONSES_ID,
content=[],
Expand Down
101 changes: 101 additions & 0 deletions tests/test_anthropic_thinking_blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
Test for Anthropic thinking blocks in conversation history.

This test validates the fix for issue #1704:
- Thinking blocks are properly preserved from Anthropic responses
- Reasoning items are stored in session but not sent back in conversation history
- Non-reasoning models are unaffected
- Token usage is not increased for non-reasoning scenarios
"""

from __future__ import annotations

from typing import Any

from agents.extensions.models.litellm_model import InternalChatCompletionMessage
from agents.models.chatcmpl_converter import Converter


def create_mock_anthropic_response_with_thinking() -> InternalChatCompletionMessage:
"""Create a mock Anthropic response with thinking blocks (like real response)."""
message = InternalChatCompletionMessage(
role="assistant",
content="I'll check the weather in Paris for you.",
reasoning_content="I need to call the weather function for Paris",
thinking_blocks=[
{
"type": "thinking",
"thinking": "I need to call the weather function for Paris",
"signature": "EqMDCkYIBxgCKkBAFZO8EyZwN1hiLctq0YjZnP0KeKgprr+C0PzgDv4GSggnFwrPQHIZ9A5s+paH+DrQBI1+Vnfq3mLAU5lJnoetEgzUEWx/Cv1022ieAvcaDCXdmg1XkMK0tZ8uCCIwURYAAX0uf2wFdnWt9n8whkhmy8ARQD5G2za4R8X5vTqBq8jpJ15T3c1Jcf3noKMZKooCWFVf0/W5VQqpZTgwDkqyTau7XraS+u48YlmJGSfyWMPO8snFLMZLGaGmVJgHfEI5PILhOEuX/R2cEeLuC715f51LMVuxTNzlOUV/037JV6P2ten7D66FnWU9JJMMJJov+DjMb728yQFHwHz4roBJ5ePHaaFP6mDwpqYuG/hai6pVv2TAK1IdKUui/oXrYtU+0gxb6UF2kS1bspqDuN++R8JdL7CMSU5l28pQ8TsH1TpVF4jZpsFbp1Du4rQIULFsCFFg+Edf9tPgyKZOq6xcskIjT7oylAPO37/jhdNknDq2S82PaSKtke3ViOigtM5uJfG521ZscBJQ1K3kwoI/repIdV9PatjOYdsYAQ==", # noqa: E501
}
],
)
return message


def test_converter_skips_reasoning_items():
"""
Unit test to verify that reasoning items are skipped when converting items to messages.
"""
# Create test items including a reasoning item
test_items: list[dict[str, Any]] = [
{"role": "user", "content": "Hello"},
{
"id": "reasoning_123",
"type": "reasoning",
"summary": [{"text": "User said hello", "type": "summary_text"}],
},
{
"id": "msg_123",
"type": "message",
"role": "assistant",
"content": [{"type": "output_text", "text": "Hi there!"}],
"status": "completed",
},
]

# Convert to messages
messages = Converter.items_to_messages(test_items) # type: ignore[arg-type]

# Should have user message and assistant message, but no reasoning content
assert len(messages) == 2
assert messages[0]["role"] == "user"
assert messages[1]["role"] == "assistant"

# Verify no thinking blocks in assistant message
assistant_msg = messages[1]
content = assistant_msg.get("content")
if isinstance(content, list):
for part in content:
assert part.get("type") != "thinking"


def test_reasoning_items_preserved_in_message_conversion():
"""
Test that reasoning content and thinking blocks are properly extracted
from Anthropic responses and stored in reasoning items.
"""
# Create mock message with thinking blocks
mock_message = create_mock_anthropic_response_with_thinking()

# Convert to output items
output_items = Converter.message_to_output_items(mock_message)

# Should have reasoning item, message item, and tool call items
reasoning_items = [
item for item in output_items if hasattr(item, "type") and item.type == "reasoning"
]
assert len(reasoning_items) == 1

reasoning_item = reasoning_items[0]
assert reasoning_item.summary[0].text == "I need to call the weather function for Paris"

# Verify thinking blocks are stored if we preserve them
if (
hasattr(reasoning_item, "content")
and reasoning_item.content
and len(reasoning_item.content) > 0
):
thinking_block = reasoning_item.content[0]
assert thinking_block.type == "reasoning_text"
assert thinking_block.text == "I need to call the weather function for Paris"