Skip to content

Commit 581111c

Browse files
habemaseratch
andauthored
fix: #1704 Preserve thinking blocks in Anthropic conversations with tool calls (#1706)
Co-authored-by: Kazuhiro Sera <[email protected]>
1 parent 789575f commit 581111c

File tree

3 files changed

+141
-8
lines changed

3 files changed

+141
-8
lines changed

src/agents/extensions/models/litellm_model.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,11 @@
5353

5454
class InternalChatCompletionMessage(ChatCompletionMessage):
5555
"""
56-
An internal subclass to carry reasoning_content without modifying the original model.
57-
"""
56+
An internal subclass to carry reasoning_content and thinking_blocks without modifying the original model.
57+
""" # noqa: E501
5858

5959
reasoning_content: str
60+
thinking_blocks: list[dict[str, Any]] | None = None
6061

6162

6263
class LitellmModel(Model):
@@ -401,6 +402,26 @@ def convert_message_to_openai(
401402
if hasattr(message, "reasoning_content") and message.reasoning_content:
402403
reasoning_content = message.reasoning_content
403404

405+
# Extract full thinking blocks including signatures (for Anthropic)
406+
thinking_blocks: list[dict[str, Any]] | None = None
407+
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
408+
# Convert thinking blocks to dict format for compatibility
409+
thinking_blocks = []
410+
for block in message.thinking_blocks:
411+
if isinstance(block, dict):
412+
thinking_blocks.append(cast(dict[str, Any], block))
413+
else:
414+
# Convert object to dict by accessing its attributes
415+
block_dict: dict[str, Any] = {}
416+
if hasattr(block, '__dict__'):
417+
block_dict = dict(block.__dict__.items())
418+
elif hasattr(block, 'model_dump'):
419+
block_dict = block.model_dump()
420+
else:
421+
# Last resort: convert to string representation
422+
block_dict = {"thinking": str(block)}
423+
thinking_blocks.append(block_dict)
424+
404425
return InternalChatCompletionMessage(
405426
content=message.content,
406427
refusal=refusal,
@@ -409,6 +430,7 @@ def convert_message_to_openai(
409430
audio=message.get("audio", None), # litellm deletes audio if not present
410431
tool_calls=tool_calls,
411432
reasoning_content=reasoning_content,
433+
thinking_blocks=thinking_blocks,
412434
)
413435

414436
@classmethod

src/agents/models/chatcmpl_converter.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,24 @@ def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TRespon
9595

9696
# Handle reasoning content if available
9797
if hasattr(message, "reasoning_content") and message.reasoning_content:
98-
items.append(
99-
ResponseReasoningItem(
100-
id=FAKE_RESPONSES_ID,
101-
summary=[Summary(text=message.reasoning_content, type="summary_text")],
102-
type="reasoning",
103-
)
98+
reasoning_item = ResponseReasoningItem(
99+
id=FAKE_RESPONSES_ID,
100+
summary=[Summary(text=message.reasoning_content, type="summary_text")],
101+
type="reasoning",
104102
)
105103

104+
# Store full thinking blocks for Anthropic compatibility
105+
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
106+
# Store thinking blocks in the reasoning item's content
107+
# Convert thinking blocks to Content objects
108+
from openai.types.responses.response_reasoning_item import Content
109+
reasoning_item.content = [
110+
Content(text=str(block.get("thinking", "")), type="reasoning_text")
111+
for block in message.thinking_blocks
112+
]
113+
114+
items.append(reasoning_item)
115+
106116
message_item = ResponseOutputMessage(
107117
id=FAKE_RESPONSES_ID,
108118
content=[],
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
"""
2+
Test for Anthropic thinking blocks in conversation history.
3+
4+
This test validates the fix for issue #1704:
5+
- Thinking blocks are properly preserved from Anthropic responses
6+
- Reasoning items are stored in session but not sent back in conversation history
7+
- Non-reasoning models are unaffected
8+
- Token usage is not increased for non-reasoning scenarios
9+
"""
10+
11+
from __future__ import annotations
12+
13+
from typing import Any
14+
15+
from agents.extensions.models.litellm_model import InternalChatCompletionMessage
16+
from agents.models.chatcmpl_converter import Converter
17+
18+
19+
def create_mock_anthropic_response_with_thinking() -> InternalChatCompletionMessage:
20+
"""Create a mock Anthropic response with thinking blocks (like real response)."""
21+
message = InternalChatCompletionMessage(
22+
role="assistant",
23+
content="I'll check the weather in Paris for you.",
24+
reasoning_content="I need to call the weather function for Paris",
25+
thinking_blocks=[
26+
{
27+
"type": "thinking",
28+
"thinking": "I need to call the weather function for Paris",
29+
"signature": "EqMDCkYIBxgCKkBAFZO8EyZwN1hiLctq0YjZnP0KeKgprr+C0PzgDv4GSggnFwrPQHIZ9A5s+paH+DrQBI1+Vnfq3mLAU5lJnoetEgzUEWx/Cv1022ieAvcaDCXdmg1XkMK0tZ8uCCIwURYAAX0uf2wFdnWt9n8whkhmy8ARQD5G2za4R8X5vTqBq8jpJ15T3c1Jcf3noKMZKooCWFVf0/W5VQqpZTgwDkqyTau7XraS+u48YlmJGSfyWMPO8snFLMZLGaGmVJgHfEI5PILhOEuX/R2cEeLuC715f51LMVuxTNzlOUV/037JV6P2ten7D66FnWU9JJMMJJov+DjMb728yQFHwHz4roBJ5ePHaaFP6mDwpqYuG/hai6pVv2TAK1IdKUui/oXrYtU+0gxb6UF2kS1bspqDuN++R8JdL7CMSU5l28pQ8TsH1TpVF4jZpsFbp1Du4rQIULFsCFFg+Edf9tPgyKZOq6xcskIjT7oylAPO37/jhdNknDq2S82PaSKtke3ViOigtM5uJfG521ZscBJQ1K3kwoI/repIdV9PatjOYdsYAQ==", # noqa: E501
30+
}
31+
],
32+
)
33+
return message
34+
35+
36+
def test_converter_skips_reasoning_items():
37+
"""
38+
Unit test to verify that reasoning items are skipped when converting items to messages.
39+
"""
40+
# Create test items including a reasoning item
41+
test_items: list[dict[str, Any]] = [
42+
{"role": "user", "content": "Hello"},
43+
{
44+
"id": "reasoning_123",
45+
"type": "reasoning",
46+
"summary": [{"text": "User said hello", "type": "summary_text"}],
47+
},
48+
{
49+
"id": "msg_123",
50+
"type": "message",
51+
"role": "assistant",
52+
"content": [{"type": "output_text", "text": "Hi there!"}],
53+
"status": "completed",
54+
},
55+
]
56+
57+
# Convert to messages
58+
messages = Converter.items_to_messages(test_items) # type: ignore[arg-type]
59+
60+
# Should have user message and assistant message, but no reasoning content
61+
assert len(messages) == 2
62+
assert messages[0]["role"] == "user"
63+
assert messages[1]["role"] == "assistant"
64+
65+
# Verify no thinking blocks in assistant message
66+
assistant_msg = messages[1]
67+
content = assistant_msg.get("content")
68+
if isinstance(content, list):
69+
for part in content:
70+
assert part.get("type") != "thinking"
71+
72+
73+
def test_reasoning_items_preserved_in_message_conversion():
74+
"""
75+
Test that reasoning content and thinking blocks are properly extracted
76+
from Anthropic responses and stored in reasoning items.
77+
"""
78+
# Create mock message with thinking blocks
79+
mock_message = create_mock_anthropic_response_with_thinking()
80+
81+
# Convert to output items
82+
output_items = Converter.message_to_output_items(mock_message)
83+
84+
# Should have reasoning item, message item, and tool call items
85+
reasoning_items = [
86+
item for item in output_items if hasattr(item, "type") and item.type == "reasoning"
87+
]
88+
assert len(reasoning_items) == 1
89+
90+
reasoning_item = reasoning_items[0]
91+
assert reasoning_item.summary[0].text == "I need to call the weather function for Paris"
92+
93+
# Verify thinking blocks are stored if we preserve them
94+
if (
95+
hasattr(reasoning_item, "content")
96+
and reasoning_item.content
97+
and len(reasoning_item.content) > 0
98+
):
99+
thinking_block = reasoning_item.content[0]
100+
assert thinking_block.type == "reasoning_text"
101+
assert thinking_block.text == "I need to call the weather function for Paris"

0 commit comments

Comments
 (0)