Skip to content

Commit f0dbcf2

Browse files
committed
feat: add anthropic_cache_all in AnthropicModel for auto caching all messages
1 parent 359c6d2 commit f0dbcf2

File tree

3 files changed

+185
-2
lines changed

3 files changed

+185
-2
lines changed

docs/models/anthropic.md

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,14 @@ agent = Agent(model)
8080

8181
## Prompt Caching
8282

83-
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
83+
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides four ways to use prompt caching:
8484

8585
1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
8686
2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
8787
3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
88+
4. **Cache Entire Conversation**: Enable the [`AnthropicModelSettings.anthropic_cache_all`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_all] [model setting](../agents.md#model-run-settings) to automatically cache the entire conversation history by adding `cache_control` to the last message
8889

89-
You can combine all three strategies for maximum savings:
90+
You can combine multiple strategies for maximum savings:
9091

9192
```python {test="skip"}
9293
from pydantic_ai import Agent, CachePoint, RunContext
@@ -124,6 +125,49 @@ async def main():
124125
print(f'Second: {result2.output}')
125126
```
126127

128+
### Cache Entire Conversation with `anthropic_cache_all`
129+
130+
For long conversations where you want to cache the entire conversation history automatically, use the `anthropic_cache_all` setting:
131+
132+
```python {test="skip"}
133+
from pydantic_ai import Agent
134+
from pydantic_ai.models.anthropic import AnthropicModelSettings
135+
136+
agent = Agent(
137+
'anthropic:claude-sonnet-4-5',
138+
system_prompt='You are a helpful assistant.',
139+
model_settings=AnthropicModelSettings(
140+
anthropic_cache_all=True,
141+
anthropic_cache_instructions=True,
142+
anthropic_cache_tool_definitions=True,
143+
),
144+
)
145+
146+
async def main():
147+
# First message - writes to cache
148+
result1 = await agent.run('What is machine learning?')
149+
150+
# Subsequent messages reuse cached conversation history
151+
result2 = await agent.run(
152+
'Can you explain that in simpler terms?',
153+
message_history=result1.all_messages()
154+
)
155+
156+
# Each new message benefits from cached history
157+
result3 = await agent.run(
158+
'Give me an example.',
159+
message_history=result2.all_messages()
160+
)
161+
```
162+
163+
This is particularly useful when:
164+
165+
- Building chatbots or conversational agents with long conversations
166+
- Iterating on queries while reusing the same context
167+
- Working with large amounts of conversation history that don't change between requests
168+
169+
### Cache Usage Statistics
170+
127171
Access cache usage statistics via `result.usage()`:
128172

129173
```python {test="skip"}

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,15 @@ class AnthropicModelSettings(ModelSettings, total=False):
166166
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
167167
"""
168168

169+
anthropic_cache_all: bool
170+
"""Whether to add `cache_control` to the last message in the conversation.
171+
172+
When enabled, the last message content will have `cache_control` set,
173+
allowing Anthropic to cache the entire conversation history and reduce costs.
174+
This is particularly useful for long conversations or when reusing context.
175+
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
176+
"""
177+
169178

170179
@dataclass(init=False)
171180
class AnthropicModel(Model):
@@ -669,6 +678,23 @@ async def _map_message( # noqa: C901
669678
anthropic_messages.append(BetaMessageParam(role='assistant', content=assistant_content_params))
670679
else:
671680
assert_never(m)
681+
# Apply cache_control to the last message if anthropic_cache_all is enabled
682+
# This allows Anthropic to cache the entire conversation history, reducing costs
683+
# for subsequent requests that reuse the same context
684+
if anthropic_messages and model_settings.get('anthropic_cache_all'):
685+
m = anthropic_messages[-1]
686+
content = m['content']
687+
if isinstance(content, str):
688+
# Convert string content to structured format with cache_control
689+
# This typically happens with assistant messages containing plain text
690+
m['content'] = [
691+
{'text': content, 'type': 'text', 'cache_control': BetaCacheControlEphemeralParam(type='ephemeral')}
692+
]
693+
else:
694+
# For structured content (lists), add cache_control to the last block
695+
content = cast(list[BetaContentBlockParam], content)
696+
self._add_cache_control_to_last_param(content)
697+
672698
if instructions := self._get_instructions(messages, model_request_parameters):
673699
system_prompt_parts.insert(0, instructions)
674700
system_prompt = '\n\n'.join(system_prompt_parts)

tests/models/test_anthropic.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,119 @@ def my_tool(value: str) -> str: # pragma: no cover
549549
)
550550

551551

552+
async def test_anthropic_cache_all_text_message(allow_model_requests: None):
553+
"""Test that anthropic_cache_all adds cache_control to last text message."""
554+
c = completion_message(
555+
[BetaTextBlock(text='Response', type='text')],
556+
usage=BetaUsage(input_tokens=10, output_tokens=5),
557+
)
558+
mock_client = MockAnthropic.create_mock(c)
559+
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
560+
agent = Agent(
561+
m,
562+
system_prompt='System prompt',
563+
model_settings=AnthropicModelSettings(anthropic_cache_all=True),
564+
)
565+
566+
await agent.run('test prompt')
567+
568+
# Verify cache_control was added to the last message content
569+
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
570+
messages = completion_kwargs['messages']
571+
assert len(messages) == 1
572+
assert messages[0]['role'] == 'user'
573+
content = messages[0]['content']
574+
assert isinstance(content, list)
575+
assert len(content) == 1 # pyright: ignore[reportUnknownArgumentType]
576+
assert content[0] == snapshot({'type': 'text', 'text': 'test prompt', 'cache_control': {'type': 'ephemeral'}})
577+
578+
579+
async def test_anthropic_cache_all_with_multiple_messages(allow_model_requests: None):
580+
"""Test that anthropic_cache_all only caches the last message in a conversation."""
581+
c1 = completion_message(
582+
[BetaTextBlock(text='First response', type='text')],
583+
usage=BetaUsage(input_tokens=10, output_tokens=5),
584+
)
585+
c2 = completion_message(
586+
[BetaTextBlock(text='Second response', type='text')],
587+
usage=BetaUsage(input_tokens=12, output_tokens=6),
588+
)
589+
mock_client = MockAnthropic.create_mock([c1, c2])
590+
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
591+
agent = Agent(
592+
m,
593+
model_settings=AnthropicModelSettings(anthropic_cache_all=True),
594+
)
595+
596+
# First run
597+
result1 = await agent.run('first prompt')
598+
assert result1.output == 'First response'
599+
600+
# Second run with conversation history
601+
result2 = await agent.run('second prompt', message_history=result1.all_messages())
602+
assert result2.output == 'Second response'
603+
604+
# Check second call - should have cache_control on last user message only
605+
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[1]
606+
messages = completion_kwargs['messages']
607+
608+
# Should have 3 messages: user1, assistant1, user2
609+
assert len(messages) == 3
610+
611+
# First user message should NOT have cache_control
612+
first_user_content = messages[0]['content']
613+
assert isinstance(first_user_content, list)
614+
assert len(first_user_content) == 1 # pyright: ignore[reportUnknownArgumentType]
615+
assert 'cache_control' not in first_user_content[0]
616+
617+
# Second user message should have cache_control
618+
last_user_content = messages[2]['content']
619+
assert isinstance(last_user_content, list)
620+
assert last_user_content[0]['cache_control'] == {'type': 'ephemeral'}
621+
622+
623+
async def test_anthropic_cache_all_combined_with_other_cache_settings(allow_model_requests: None):
624+
"""Test that anthropic_cache_all works together with other cache settings."""
625+
c = completion_message(
626+
[BetaTextBlock(text='Response', type='text')],
627+
usage=BetaUsage(input_tokens=10, output_tokens=5),
628+
)
629+
mock_client = MockAnthropic.create_mock(c)
630+
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
631+
agent = Agent(
632+
m,
633+
system_prompt='System instructions to cache.',
634+
model_settings=AnthropicModelSettings(
635+
anthropic_cache_tool_definitions=True,
636+
anthropic_cache_instructions=True,
637+
anthropic_cache_all=True,
638+
),
639+
)
640+
641+
@agent.tool_plain
642+
def my_tool(value: str) -> str: # pragma: no cover
643+
return f'Result: {value}'
644+
645+
await agent.run('test prompt')
646+
647+
# Verify all cache settings are applied
648+
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
649+
650+
# Check tools have cache_control
651+
tools = completion_kwargs['tools']
652+
assert tools[0]['cache_control'] == {'type': 'ephemeral'}
653+
654+
# Check system has cache_control
655+
system = completion_kwargs['system']
656+
assert system[0]['cache_control'] == {'type': 'ephemeral'}
657+
658+
# Check last message has cache_control
659+
messages = completion_kwargs['messages']
660+
last_message_content = messages[-1]['content']
661+
assert isinstance(last_message_content, list)
662+
assert last_message_content[0]['cache_control'] == {'type': 'ephemeral'}
663+
664+
552665
async def test_async_request_text_response(allow_model_requests: None):
553666
c = completion_message(
554667
[BetaTextBlock(text='world', type='text')],

0 commit comments

Comments
 (0)