Skip to content

Commit 6125b86

Browse files
authored
supporting thinking for anthropic models (#3978)
* supporting thinking for anthropic models * drop comments here * thinking and tool calling support * fix: properly mock tool use and text block types in Anthropic tests - Updated the test for the Anthropic tool use conversation flow to include type attributes for mocked ToolUseBlock and text blocks, ensuring accurate simulation of tool interactions during testing. * feat: add AnthropicThinkingConfig for enhanced thinking capabilities This update introduces the AnthropicThinkingConfig class to manage thinking parameters for the Anthropic completion model. The LLM and AnthropicCompletion classes have been updated to utilize this new configuration. Additionally, new test cassettes have been added to validate the functionality of thinking blocks across interactions.
1 parent f2f9946 commit 6125b86

11 files changed

+1605
-73
lines changed

docs/en/concepts/llms.mdx

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,54 @@ In this section, you'll find detailed examples that help you select, configure,
283283
)
284284
```
285285

286+
**Extended Thinking (Claude Sonnet 4 and Beyond):**
287+
288+
CrewAI supports Anthropic's Extended Thinking feature, which allows Claude to think through problems in a more human-like way before responding. This is particularly useful for complex reasoning, analysis, and problem-solving tasks.
289+
290+
```python Code
291+
from crewai import LLM
292+
293+
# Enable extended thinking with default settings
294+
llm = LLM(
295+
model="anthropic/claude-sonnet-4",
296+
thinking={"type": "enabled"},
297+
max_tokens=10000
298+
)
299+
300+
# Configure thinking with budget control
301+
llm = LLM(
302+
model="anthropic/claude-sonnet-4",
303+
thinking={
304+
"type": "enabled",
305+
"budget_tokens": 5000 # Limit thinking tokens
306+
},
307+
max_tokens=10000
308+
)
309+
```
310+
311+
**Thinking Configuration Options:**
312+
- `type`: Set to `"enabled"` to activate extended thinking mode
313+
- `budget_tokens` (optional): Maximum tokens to use for thinking (helps control costs)
314+
315+
**Models Supporting Extended Thinking:**
316+
- `claude-sonnet-4` and newer models
317+
- `claude-3-7-sonnet` (with extended thinking capabilities)
318+
319+
**When to Use Extended Thinking:**
320+
- Complex reasoning and multi-step problem solving
321+
- Mathematical calculations and proofs
322+
- Code analysis and debugging
323+
- Strategic planning and decision making
324+
- Research and analytical tasks
325+
326+
**Note:** Extended thinking consumes additional tokens but can significantly improve response quality for complex tasks.
327+
286328
**Supported Environment Variables:**
287329
- `ANTHROPIC_API_KEY`: Your Anthropic API key (required)
288330

289331
**Features:**
290332
- Native tool use support for Claude 3+ models
333+
- Extended Thinking support for Claude Sonnet 4+
291334
- Streaming support for real-time responses
292335
- Automatic system message handling
293336
- Stop sequences for controlled output
@@ -305,6 +348,7 @@ In this section, you'll find detailed examples that help you select, configure,
305348

306349
| Model | Context Window | Best For |
307350
|------------------------------|----------------|-----------------------------------------------|
351+
| claude-sonnet-4 | 200,000 tokens | Latest with extended thinking capabilities |
308352
| claude-3-7-sonnet | 200,000 tokens | Advanced reasoning and agentic tasks |
309353
| claude-3-5-sonnet-20241022 | 200,000 tokens | Latest Sonnet with best performance |
310354
| claude-3-5-haiku | 200,000 tokens | Fast, compact model for quick responses |

lib/crewai/src/crewai/llm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767

6868
from crewai.agent.core import Agent
6969
from crewai.llms.hooks.base import BaseInterceptor
70+
from crewai.llms.providers.anthropic.completion import AnthropicThinkingConfig
7071
from crewai.task import Task
7172
from crewai.tools.base_tool import BaseTool
7273
from crewai.utilities.types import LLMMessage
@@ -585,6 +586,7 @@ def __init__(
585586
reasoning_effort: Literal["none", "low", "medium", "high"] | None = None,
586587
stream: bool = False,
587588
interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
589+
thinking: AnthropicThinkingConfig | dict[str, Any] | None = None,
588590
**kwargs: Any,
589591
) -> None:
590592
"""Initialize LLM instance.

lib/crewai/src/crewai/llms/providers/anthropic/completion.py

Lines changed: 119 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import json
44
import logging
55
import os
6-
from typing import TYPE_CHECKING, Any, cast
6+
from typing import TYPE_CHECKING, Any, Literal, cast
77

8+
from anthropic.types import ThinkingBlock
89
from pydantic import BaseModel
910

1011
from crewai.events.types.llm_events import LLMCallType
@@ -22,15 +23,19 @@
2223

2324
try:
2425
from anthropic import Anthropic, AsyncAnthropic
25-
from anthropic.types import Message
26-
from anthropic.types.tool_use_block import ToolUseBlock
26+
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
2727
import httpx
2828
except ImportError:
2929
raise ImportError(
3030
'Anthropic native provider not available, to install: uv add "crewai[anthropic]"'
3131
) from None
3232

3333

34+
class AnthropicThinkingConfig(BaseModel):
35+
type: Literal["enabled", "disabled"]
36+
budget_tokens: int | None = None
37+
38+
3439
class AnthropicCompletion(BaseLLM):
3540
"""Anthropic native completion implementation.
3641
@@ -52,6 +57,7 @@ def __init__(
5257
stream: bool = False,
5358
client_params: dict[str, Any] | None = None,
5459
interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
60+
thinking: AnthropicThinkingConfig | None = None,
5561
**kwargs: Any,
5662
):
5763
"""Initialize Anthropic chat completion client.
@@ -97,6 +103,10 @@ def __init__(
97103
self.top_p = top_p
98104
self.stream = stream
99105
self.stop_sequences = stop_sequences or []
106+
self.thinking = thinking
107+
self.previous_thinking_blocks: list[ThinkingBlock] = []
108+
# Model-specific settings
109+
self.is_claude_3 = "claude-3" in model.lower()
100110
self.supports_tools = True
101111

102112
@property
@@ -326,6 +336,12 @@ def _prepare_completion_params(
326336
if tools and self.supports_tools:
327337
params["tools"] = self._convert_tools_for_interference(tools)
328338

339+
if self.thinking:
340+
if isinstance(self.thinking, AnthropicThinkingConfig):
341+
params["thinking"] = self.thinking.model_dump()
342+
else:
343+
params["thinking"] = self.thinking
344+
329345
return params
330346

331347
def _convert_tools_for_interference(
@@ -365,6 +381,34 @@ def _convert_tools_for_interference(
365381

366382
return anthropic_tools
367383

384+
def _extract_thinking_block(
385+
self, content_block: Any
386+
) -> ThinkingBlock | dict[str, Any] | None:
387+
"""Extract and format thinking block from content block.
388+
389+
Args:
390+
content_block: Content block from Anthropic response
391+
392+
Returns:
393+
Dictionary with thinking block data including signature, or None if not a thinking block
394+
"""
395+
if content_block.type == "thinking":
396+
thinking_block = {
397+
"type": "thinking",
398+
"thinking": content_block.thinking,
399+
}
400+
if hasattr(content_block, "signature"):
401+
thinking_block["signature"] = content_block.signature
402+
return thinking_block
403+
if content_block.type == "redacted_thinking":
404+
redacted_block = {"type": "redacted_thinking"}
405+
if hasattr(content_block, "thinking"):
406+
redacted_block["thinking"] = content_block.thinking
407+
if hasattr(content_block, "signature"):
408+
redacted_block["signature"] = content_block.signature
409+
return redacted_block
410+
return None
411+
368412
def _format_messages_for_anthropic(
369413
self, messages: str | list[LLMMessage]
370414
) -> tuple[list[LLMMessage], str | None]:
@@ -374,6 +418,7 @@ def _format_messages_for_anthropic(
374418
- System messages are separate from conversation messages
375419
- Messages must alternate between user and assistant
376420
- First message must be from user
421+
- When thinking is enabled, assistant messages must start with thinking blocks
377422
378423
Args:
379424
messages: Input messages
@@ -398,8 +443,29 @@ def _format_messages_for_anthropic(
398443
system_message = cast(str, content)
399444
else:
400445
role_str = role if role is not None else "user"
401-
content_str = content if content is not None else ""
402-
formatted_messages.append({"role": role_str, "content": content_str})
446+
447+
if isinstance(content, list):
448+
formatted_messages.append({"role": role_str, "content": content})
449+
elif (
450+
role_str == "assistant"
451+
and self.thinking
452+
and self.previous_thinking_blocks
453+
):
454+
structured_content = cast(
455+
list[dict[str, Any]],
456+
[
457+
*self.previous_thinking_blocks,
458+
{"type": "text", "text": content if content else ""},
459+
],
460+
)
461+
formatted_messages.append(
462+
LLMMessage(role=role_str, content=structured_content)
463+
)
464+
else:
465+
content_str = content if content is not None else ""
466+
formatted_messages.append(
467+
LLMMessage(role=role_str, content=content_str)
468+
)
403469

404470
# Ensure first message is from user (Anthropic requirement)
405471
if not formatted_messages:
@@ -449,7 +515,6 @@ def _handle_completion(
449515
if tool_uses and tool_uses[0].name == "structured_output":
450516
structured_data = tool_uses[0].input
451517
structured_json = json.dumps(structured_data)
452-
453518
self._emit_call_completed_event(
454519
response=structured_json,
455520
call_type=LLMCallType.LLM_CALL,
@@ -477,15 +542,22 @@ def _handle_completion(
477542
from_agent,
478543
)
479544

480-
# Extract text content
481545
content = ""
546+
thinking_blocks: list[ThinkingBlock] = []
547+
482548
if response.content:
483549
for content_block in response.content:
484550
if hasattr(content_block, "text"):
485551
content += content_block.text
552+
else:
553+
thinking_block = self._extract_thinking_block(content_block)
554+
if thinking_block:
555+
thinking_blocks.append(cast(ThinkingBlock, thinking_block))
486556

487-
content = self._apply_stop_words(content)
557+
if thinking_blocks:
558+
self.previous_thinking_blocks = thinking_blocks
488559

560+
content = self._apply_stop_words(content)
489561
self._emit_call_completed_event(
490562
response=content,
491563
call_type=LLMCallType.LLM_CALL,
@@ -540,6 +612,16 @@ def _handle_streaming_completion(
540612

541613
final_message: Message = stream.get_final_message()
542614

615+
thinking_blocks: list[ThinkingBlock] = []
616+
if final_message.content:
617+
for content_block in final_message.content:
618+
thinking_block = self._extract_thinking_block(content_block)
619+
if thinking_block:
620+
thinking_blocks.append(cast(ThinkingBlock, thinking_block))
621+
622+
if thinking_blocks:
623+
self.previous_thinking_blocks = thinking_blocks
624+
543625
usage = self._extract_anthropic_token_usage(final_message)
544626
self._track_token_usage_internal(usage)
545627

@@ -644,7 +726,26 @@ def _handle_tool_use_conversation(
644726
follow_up_params = params.copy()
645727

646728
# Add Claude's tool use response to conversation
647-
assistant_message = {"role": "assistant", "content": initial_response.content}
729+
assistant_content: list[
730+
ThinkingBlock | ToolUseBlock | TextBlock | dict[str, Any]
731+
] = []
732+
for block in initial_response.content:
733+
thinking_block = self._extract_thinking_block(block)
734+
if thinking_block:
735+
assistant_content.append(thinking_block)
736+
elif block.type == "tool_use":
737+
assistant_content.append(
738+
{
739+
"type": "tool_use",
740+
"id": block.id,
741+
"name": block.name,
742+
"input": block.input,
743+
}
744+
)
745+
elif hasattr(block, "text"):
746+
assistant_content.append({"type": "text", "text": block.text})
747+
748+
assistant_message = {"role": "assistant", "content": assistant_content}
648749

649750
# Add user message with tool results
650751
user_message = {"role": "user", "content": tool_results}
@@ -663,12 +764,20 @@ def _handle_tool_use_conversation(
663764
follow_up_usage = self._extract_anthropic_token_usage(final_response)
664765
self._track_token_usage_internal(follow_up_usage)
665766

666-
# Extract final text content
667767
final_content = ""
768+
thinking_blocks: list[ThinkingBlock] = []
769+
668770
if final_response.content:
669771
for content_block in final_response.content:
670772
if hasattr(content_block, "text"):
671773
final_content += content_block.text
774+
else:
775+
thinking_block = self._extract_thinking_block(content_block)
776+
if thinking_block:
777+
thinking_blocks.append(cast(ThinkingBlock, thinking_block))
778+
779+
if thinking_blocks:
780+
self.previous_thinking_blocks = thinking_blocks
672781

673782
final_content = self._apply_stop_words(final_content)
674783

0 commit comments

Comments
 (0)