Skip to content

Commit 4592255

Browse files
committed
Add anthropic_cache_tools and anthropic_cache_instructions settings
This commit addresses maintainer feedback on the Anthropic prompt caching PR: - Add anthropic_cache_tools field to cache last tool definition - Add anthropic_cache_instructions field to cache system prompts - Rewrite existing CachePoint tests to use snapshot() assertions - Add comprehensive tests for new caching settings - Remove standalone example file, add docs section instead - Move imports to top of test files - Remove ineffective Google CachePoint test - Add "Supported by: Anthropic" to CachePoint docstring - Add Anthropic docs link in cache_control method Tests are written but snapshots not yet generated (will be done in next commit).
1 parent 4824eeb commit 4592255

File tree

8 files changed

+279
-224
lines changed

8 files changed

+279
-224
lines changed

docs/models/anthropic.md

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,111 @@ model = AnthropicModel(
7777
agent = Agent(model)
7878
...
7979
```
80+
81+
## Prompt Caching
82+
83+
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. PydanticAI provides three ways to use prompt caching:
84+
85+
### 1. Cache User Messages with `CachePoint`
86+
87+
Insert a [`CachePoint`][pydantic_ai.messages.CachePoint] marker in your user messages to cache everything before it:
88+
89+
```python
90+
from pydantic_ai import Agent, CachePoint
91+
92+
agent = Agent('anthropic:claude-sonnet-4-5')
93+
94+
# Everything before CachePoint will be cached
95+
result = await agent.run([
96+
"Long context that should be cached...",
97+
CachePoint(),
98+
"Your question here"
99+
])
100+
```
101+
102+
### 2. Cache System Instructions
103+
104+
Use `anthropic_cache_instructions=True` to cache your system prompt:
105+
106+
```python
107+
from pydantic_ai import Agent
108+
from pydantic_ai.models.anthropic import AnthropicModelSettings
109+
110+
agent = Agent(
111+
'anthropic:claude-sonnet-4-5',
112+
system_prompt='Long detailed instructions...',
113+
model_settings=AnthropicModelSettings(
114+
anthropic_cache_instructions=True
115+
),
116+
)
117+
118+
result = await agent.run("Your question")
119+
```
120+
121+
### 3. Cache Tool Definitions
122+
123+
Use `anthropic_cache_tools=True` to cache your tool definitions:
124+
125+
```python
126+
from pydantic_ai import Agent
127+
from pydantic_ai.models.anthropic import AnthropicModelSettings
128+
129+
agent = Agent(
130+
'anthropic:claude-sonnet-4-5',
131+
model_settings=AnthropicModelSettings(
132+
anthropic_cache_tools=True
133+
),
134+
)
135+
136+
@agent.tool
137+
def my_tool() -> str:
138+
"""Tool definition will be cached."""
139+
return "result"
140+
141+
result = await agent.run("Use the tool")
142+
```
143+
144+
### Combining Cache Strategies
145+
146+
You can combine all three caching strategies for maximum savings:
147+
148+
```python
149+
from pydantic_ai import Agent, CachePoint
150+
from pydantic_ai.models.anthropic import AnthropicModelSettings
151+
152+
agent = Agent(
153+
'anthropic:claude-sonnet-4-5',
154+
system_prompt='Detailed instructions...',
155+
model_settings=AnthropicModelSettings(
156+
anthropic_cache_instructions=True,
157+
anthropic_cache_tools=True,
158+
),
159+
)
160+
161+
@agent.tool
162+
def search_docs(query: str) -> str:
163+
"""Search documentation."""
164+
return f"Results for {query}"
165+
166+
# First call - writes to cache
167+
result1 = await agent.run([
168+
"Long context from documentation...",
169+
CachePoint(),
170+
"First question"
171+
])
172+
173+
# Subsequent calls - read from cache (90% cost reduction)
174+
result2 = await agent.run([
175+
"Long context from documentation...", # Same content
176+
CachePoint(),
177+
"Second question"
178+
])
179+
```
180+
181+
Access cache usage statistics via `result.usage()`:
182+
183+
```python
184+
usage = result.usage()
185+
print(f"Cache write tokens: {usage.cache_write_tokens}")
186+
print(f"Cache read tokens: {usage.cache_read_tokens}")
187+
```

examples/pydantic_ai_examples/anthropic_prompt_caching.py

Lines changed: 0 additions & 152 deletions
This file was deleted.

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,10 @@ class CachePoint:
618618
619619
Can be inserted into UserPromptPart.content to mark cache boundaries.
620620
Models that don't support caching will filter these out.
621+
622+
Supported by:
623+
624+
- Anthropic
621625
"""
622626

623627
kind: Literal['cache-point'] = 'cache-point'

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,22 @@ class AnthropicModelSettings(ModelSettings, total=False):
150150
See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
151151
"""
152152

153+
anthropic_cache_tools: bool
154+
"""Whether to add cache_control to the last tool definition.
155+
156+
When enabled, the last tool in the tools array will have cache_control set,
157+
allowing Anthropic to cache tool definitions and reduce costs.
158+
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
159+
"""
160+
161+
anthropic_cache_instructions: bool
162+
"""Whether to add cache_control to the last system prompt block.
163+
164+
When enabled, the last system prompt will have cache_control set,
165+
allowing Anthropic to cache system instructions and reduce costs.
166+
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
167+
"""
168+
153169

154170
@dataclass(init=False)
155171
class AnthropicModel(Model):
@@ -291,7 +307,7 @@ async def _messages_create(
291307
model_request_parameters: ModelRequestParameters,
292308
) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
293309
# standalone function to make it easier to override
294-
tools = self._get_tools(model_request_parameters)
310+
tools = self._get_tools(model_request_parameters, model_settings)
295311
tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters)
296312

297313
tool_choice: BetaToolChoiceParam | None
@@ -307,7 +323,7 @@ async def _messages_create(
307323
if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
308324
tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
309325

310-
system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters)
326+
system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings)
311327

312328
try:
313329
extra_headers = model_settings.get('extra_headers', {})
@@ -413,8 +429,19 @@ async def _process_streamed_response(
413429
_provider_url=self._provider.base_url,
414430
)
415431

416-
def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[BetaToolUnionParam]:
417-
return [self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()]
432+
def _get_tools(
433+
self, model_request_parameters: ModelRequestParameters, model_settings: AnthropicModelSettings
434+
) -> list[BetaToolUnionParam]:
435+
tools: list[BetaToolUnionParam] = [
436+
self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()
437+
]
438+
439+
# Add cache_control to the last tool if enabled
440+
if tools and model_settings.get('anthropic_cache_tools'):
441+
last_tool = cast(dict[str, Any], tools[-1])
442+
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
443+
444+
return tools
418445

419446
def _add_builtin_tools(
420447
self, tools: list[BetaToolUnionParam], model_request_parameters: ModelRequestParameters
@@ -466,8 +493,11 @@ def _add_builtin_tools(
466493
return tools, mcp_servers, beta_features
467494

468495
async def _map_message( # noqa: C901
469-
self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
470-
) -> tuple[str, list[BetaMessageParam]]:
496+
self,
497+
messages: list[ModelMessage],
498+
model_request_parameters: ModelRequestParameters,
499+
model_settings: AnthropicModelSettings,
500+
) -> tuple[str | list[BetaTextBlockParam], list[BetaMessageParam]]:
471501
"""Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
472502
system_prompt_parts: list[str] = []
473503
anthropic_messages: list[BetaMessageParam] = []
@@ -642,11 +672,24 @@ async def _map_message( # noqa: C901
642672
if instructions := self._get_instructions(messages, model_request_parameters):
643673
system_prompt_parts.insert(0, instructions)
644674
system_prompt = '\n\n'.join(system_prompt_parts)
675+
676+
# If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
677+
if system_prompt and model_settings.get('anthropic_cache_instructions'):
678+
system_prompt_blocks = [
679+
BetaTextBlockParam(
680+
type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
681+
)
682+
]
683+
return system_prompt_blocks, anthropic_messages
684+
645685
return system_prompt, anthropic_messages
646686

647687
@staticmethod
648688
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
649-
"""Add cache control to the last content block param."""
689+
"""Add cache control to the last content block param.
690+
691+
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
692+
"""
650693
if not params:
651694
raise UserError(
652695
'CachePoint cannot be the first content in a user message - there must be previous content to attach the CachePoint to.'

0 commit comments

Comments
 (0)