|  | 
|  | 1 | +""" | 
|  | 2 | +Test for Anthropic thinking blocks in conversation history. | 
|  | 3 | +
 | 
|  | 4 | +This test validates the fix for issue #1704: | 
|  | 5 | +- Thinking blocks are properly preserved from Anthropic responses | 
|  | 6 | +- Reasoning items are stored in session but not sent back in conversation history | 
|  | 7 | +- Non-reasoning models are unaffected | 
|  | 8 | +- Token usage is not increased for non-reasoning scenarios | 
|  | 9 | +""" | 
|  | 10 | + | 
|  | 11 | +from __future__ import annotations | 
|  | 12 | + | 
|  | 13 | +from typing import Any | 
|  | 14 | + | 
|  | 15 | +from agents.extensions.models.litellm_model import InternalChatCompletionMessage | 
|  | 16 | +from agents.models.chatcmpl_converter import Converter | 
|  | 17 | + | 
|  | 18 | + | 
|  | 19 | +def create_mock_anthropic_response_with_thinking() -> InternalChatCompletionMessage: | 
|  | 20 | +    """Create a mock Anthropic response with thinking blocks (like real response).""" | 
|  | 21 | +    message = InternalChatCompletionMessage( | 
|  | 22 | +        role="assistant", | 
|  | 23 | +        content="I'll check the weather in Paris for you.", | 
|  | 24 | +        reasoning_content="I need to call the weather function for Paris", | 
|  | 25 | +        thinking_blocks=[ | 
|  | 26 | +            { | 
|  | 27 | +                "type": "thinking", | 
|  | 28 | +                "thinking": "I need to call the weather function for Paris", | 
|  | 29 | +                "signature": "EqMDCkYIBxgCKkBAFZO8EyZwN1hiLctq0YjZnP0KeKgprr+C0PzgDv4GSggnFwrPQHIZ9A5s+paH+DrQBI1+Vnfq3mLAU5lJnoetEgzUEWx/Cv1022ieAvcaDCXdmg1XkMK0tZ8uCCIwURYAAX0uf2wFdnWt9n8whkhmy8ARQD5G2za4R8X5vTqBq8jpJ15T3c1Jcf3noKMZKooCWFVf0/W5VQqpZTgwDkqyTau7XraS+u48YlmJGSfyWMPO8snFLMZLGaGmVJgHfEI5PILhOEuX/R2cEeLuC715f51LMVuxTNzlOUV/037JV6P2ten7D66FnWU9JJMMJJov+DjMb728yQFHwHz4roBJ5ePHaaFP6mDwpqYuG/hai6pVv2TAK1IdKUui/oXrYtU+0gxb6UF2kS1bspqDuN++R8JdL7CMSU5l28pQ8TsH1TpVF4jZpsFbp1Du4rQIULFsCFFg+Edf9tPgyKZOq6xcskIjT7oylAPO37/jhdNknDq2S82PaSKtke3ViOigtM5uJfG521ZscBJQ1K3kwoI/repIdV9PatjOYdsYAQ==",  # noqa: E501 | 
|  | 30 | +            } | 
|  | 31 | +        ], | 
|  | 32 | +    ) | 
|  | 33 | +    return message | 
|  | 34 | + | 
|  | 35 | + | 
|  | 36 | +def test_converter_skips_reasoning_items(): | 
|  | 37 | +    """ | 
|  | 38 | +    Unit test to verify that reasoning items are skipped when converting items to messages. | 
|  | 39 | +    """ | 
|  | 40 | +    # Create test items including a reasoning item | 
|  | 41 | +    test_items: list[dict[str, Any]] = [ | 
|  | 42 | +        {"role": "user", "content": "Hello"}, | 
|  | 43 | +        { | 
|  | 44 | +            "id": "reasoning_123", | 
|  | 45 | +            "type": "reasoning", | 
|  | 46 | +            "summary": [{"text": "User said hello", "type": "summary_text"}], | 
|  | 47 | +        }, | 
|  | 48 | +        { | 
|  | 49 | +            "id": "msg_123", | 
|  | 50 | +            "type": "message", | 
|  | 51 | +            "role": "assistant", | 
|  | 52 | +            "content": [{"type": "output_text", "text": "Hi there!"}], | 
|  | 53 | +            "status": "completed", | 
|  | 54 | +        }, | 
|  | 55 | +    ] | 
|  | 56 | + | 
|  | 57 | +    # Convert to messages | 
|  | 58 | +    messages = Converter.items_to_messages(test_items)  # type: ignore[arg-type] | 
|  | 59 | + | 
|  | 60 | +    # Should have user message and assistant message, but no reasoning content | 
|  | 61 | +    assert len(messages) == 2 | 
|  | 62 | +    assert messages[0]["role"] == "user" | 
|  | 63 | +    assert messages[1]["role"] == "assistant" | 
|  | 64 | + | 
|  | 65 | +    # Verify no thinking blocks in assistant message | 
|  | 66 | +    assistant_msg = messages[1] | 
|  | 67 | +    content = assistant_msg.get("content") | 
|  | 68 | +    if isinstance(content, list): | 
|  | 69 | +        for part in content: | 
|  | 70 | +            assert part.get("type") != "thinking" | 
|  | 71 | + | 
|  | 72 | + | 
|  | 73 | +def test_reasoning_items_preserved_in_message_conversion(): | 
|  | 74 | +    """ | 
|  | 75 | +    Test that reasoning content and thinking blocks are properly extracted | 
|  | 76 | +    from Anthropic responses and stored in reasoning items. | 
|  | 77 | +    """ | 
|  | 78 | +    # Create mock message with thinking blocks | 
|  | 79 | +    mock_message = create_mock_anthropic_response_with_thinking() | 
|  | 80 | + | 
|  | 81 | +    # Convert to output items | 
|  | 82 | +    output_items = Converter.message_to_output_items(mock_message) | 
|  | 83 | + | 
|  | 84 | +    # Should have reasoning item, message item, and tool call items | 
|  | 85 | +    reasoning_items = [ | 
|  | 86 | +        item for item in output_items if hasattr(item, "type") and item.type == "reasoning" | 
|  | 87 | +    ] | 
|  | 88 | +    assert len(reasoning_items) == 1 | 
|  | 89 | + | 
|  | 90 | +    reasoning_item = reasoning_items[0] | 
|  | 91 | +    assert reasoning_item.summary[0].text == "I need to call the weather function for Paris" | 
|  | 92 | + | 
|  | 93 | +    # Verify thinking blocks are stored if we preserve them | 
|  | 94 | +    if ( | 
|  | 95 | +        hasattr(reasoning_item, "content") | 
|  | 96 | +        and reasoning_item.content | 
|  | 97 | +        and len(reasoning_item.content) > 0 | 
|  | 98 | +    ): | 
|  | 99 | +        thinking_block = reasoning_item.content[0] | 
|  | 100 | +        assert thinking_block.type == "reasoning_text" | 
|  | 101 | +        assert thinking_block.text == "I need to call the weather function for Paris" | 
0 commit comments