Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0ea9f68
Add Anthropic prompt caching support with CachePoint
ronakrm Nov 6, 2025
fd28844
Fix type checking errors for CachePoint
ronakrm Nov 6, 2025
247e936
Add complexity noqa comment to openai._map_user_prompt
ronakrm Nov 6, 2025
a75ed81
Add tests and fix type checking for 100% coverage
ronakrm Nov 7, 2025
54869d6
Add tests to cover CachePoint filtering in all models
ronakrm Nov 7, 2025
4824eeb
linting
ronakrm Nov 7, 2025
4592255
Add anthropic_cache_tools and anthropic_cache_instructions settings
ronakrm Nov 12, 2025
7e02ac4
Generate inline snapshots for CachePoint tests
ronakrm Nov 12, 2025
3a0de37
Fix test_anthropic_empty_content_filtering for new _map_message signa…
ronakrm Nov 12, 2025
2ea2a63
Fix leftover conflict marker in test_google.py
ronakrm Nov 12, 2025
57d051a
Add type ignore comments for protected method calls in tests
ronakrm Nov 12, 2025
92509fe
Fix doc examples: wrap await in async functions and use single quotes
ronakrm Nov 12, 2025
f088447
Add comprehensive test coverage for CachePoint feature
ronakrm Nov 13, 2025
56a8047
Address PR review comments
ronakrm Nov 13, 2025
e70956e
Merge branch 'main' into anthropic-prompt-caching-only
DouweM Nov 13, 2025
6f29370
Small lint in test_openai.py
ronakrm Nov 13, 2025
8bb5370
Fix pyright type checking errors in tests
ronakrm Nov 13, 2025
f274699
Update docs/models/anthropic.md
ronakrm Nov 13, 2025
9408b58
Update pydantic_ai_slim/pydantic_ai/models/bedrock.py
ronakrm Nov 14, 2025
78aa4a3
Address PR review comments
ronakrm Nov 14, 2025
3dc6901
Add pragma: no cover to test helper tool functions
ronakrm Nov 14, 2025
11e7ab7
Merge branch 'main' into anthropic-prompt-caching-only
ronakrm Nov 14, 2025
0c8b9e0
Add cross-reference links to cache settings in docs
ronakrm Nov 14, 2025
5e29917
Simplify prompt caching documentation structure
ronakrm Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions docs/models/anthropic.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,133 @@ model = AnthropicModel(
agent = Agent(model)
...
```

## Prompt Caching

Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. PydanticAI provides three ways to use prompt caching:

### 1. Cache User Messages with `CachePoint`

Insert a [`CachePoint`][pydantic_ai.messages.CachePoint] marker in your user messages to cache everything before it:

```python {test="skip"}
from pydantic_ai import Agent, CachePoint

agent = Agent('anthropic:claude-sonnet-4-5')

async def main():
# Everything before CachePoint will be cached
result = await agent.run([
'Long context that should be cached...',
CachePoint(),
'Your question here'
])
print(result.output)
```

### 2. Cache System Instructions

Use `anthropic_cache_instructions=True` to cache your system prompt:

```python {test="skip"}
from pydantic_ai import Agent
from pydantic_ai.models.anthropic import AnthropicModelSettings

agent = Agent(
'anthropic:claude-sonnet-4-5',
system_prompt='Long detailed instructions...',
model_settings=AnthropicModelSettings(
anthropic_cache_instructions=True
),
)

async def main():
result = await agent.run('Your question')
print(result.output)
```

### 3. Cache Tool Definitions

Use `anthropic_cache_tools=True` to cache your tool definitions:

```python {test="skip"}
from pydantic_ai import Agent
from pydantic_ai.models.anthropic import AnthropicModelSettings

agent = Agent(
'anthropic:claude-sonnet-4-5',
model_settings=AnthropicModelSettings(
anthropic_cache_tools=True
),
)

@agent.tool
def my_tool() -> str:
"""Tool definition will be cached."""
return 'result'

async def main():
result = await agent.run('Use the tool')
print(result.output)
```

### Combining Cache Strategies

You can combine all three caching strategies for maximum savings:

```python {test="skip"}
from pydantic_ai import Agent, CachePoint, RunContext
from pydantic_ai.models.anthropic import AnthropicModelSettings

agent = Agent(
'anthropic:claude-sonnet-4-5',
system_prompt='Detailed instructions...',
model_settings=AnthropicModelSettings(
anthropic_cache_instructions=True,
anthropic_cache_tools=True,
),
)

@agent.tool
def search_docs(ctx: RunContext, query: str) -> str:
"""Search documentation."""
return f'Results for {query}'

async def main():
# First call - writes to cache
result1 = await agent.run([
'Long context from documentation...',
CachePoint(),
'First question'
])

# Subsequent calls - read from cache (90% cost reduction)
result2 = await agent.run([
'Long context from documentation...', # Same content
CachePoint(),
'Second question'
])
print(f'First: {result1.output}')
print(f'Second: {result2.output}')
```

Access cache usage statistics via `result.usage()`:

```python {test="skip"}
from pydantic_ai import Agent
from pydantic_ai.models.anthropic import AnthropicModelSettings

agent = Agent(
'anthropic:claude-sonnet-4-5',
system_prompt='Instructions...',
model_settings=AnthropicModelSettings(
anthropic_cache_instructions=True
),
)

async def main():
result = await agent.run('Your question')
usage = result.usage()
print(f'Cache write tokens: {usage.cache_write_tokens}')
print(f'Cache read tokens: {usage.cache_read_tokens}')
```
2 changes: 2 additions & 0 deletions pydantic_ai_slim/pydantic_ai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
BinaryImage,
BuiltinToolCallPart,
BuiltinToolReturnPart,
CachePoint,
DocumentFormat,
DocumentMediaType,
DocumentUrl,
Expand Down Expand Up @@ -141,6 +142,7 @@
'BinaryContent',
'BuiltinToolCallPart',
'BuiltinToolReturnPart',
'CachePoint',
'DocumentFormat',
'DocumentMediaType',
'DocumentUrl',
Expand Down
21 changes: 20 additions & 1 deletion pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,8 +612,24 @@ def __init__(
raise ValueError('`BinaryImage` must be have a media type that starts with "image/"') # pragma: no cover


@dataclass
class CachePoint:
"""A cache point marker for prompt caching.

Can be inserted into UserPromptPart.content to mark cache boundaries.
Models that don't support caching will filter these out.

Supported by:

- Anthropic
"""

kind: Literal['cache-point'] = 'cache-point'
"""Type identifier, this is available on all parts as a discriminator."""


MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
UserContent: TypeAlias = str | MultiModalContent
UserContent: TypeAlias = str | MultiModalContent | CachePoint


@dataclass(repr=False)
Expand Down Expand Up @@ -730,6 +746,9 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me
if settings.include_content and settings.include_binary_content:
converted_part['content'] = base64.b64encode(part.data).decode()
parts.append(converted_part)
elif isinstance(part, CachePoint):
# CachePoint is a marker, not actual content - skip it for otel
pass
else:
parts.append({'type': part.kind}) # pragma: no cover
return parts
Expand Down
86 changes: 78 additions & 8 deletions pydantic_ai_slim/pydantic_ai/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
BinaryContent,
BuiltinToolCallPart,
BuiltinToolReturnPart,
CachePoint,
DocumentUrl,
FilePart,
FinishReason,
Expand Down Expand Up @@ -58,6 +59,7 @@
from anthropic.types.beta import (
BetaBase64PDFBlockParam,
BetaBase64PDFSourceParam,
BetaCacheControlEphemeralParam,
BetaCitationsDelta,
BetaCodeExecutionTool20250522Param,
BetaCodeExecutionToolResultBlock,
Expand Down Expand Up @@ -148,6 +150,22 @@ class AnthropicModelSettings(ModelSettings, total=False):
See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
"""

anthropic_cache_tools: bool
"""Whether to add cache_control to the last tool definition.
When enabled, the last tool in the tools array will have cache_control set,
allowing Anthropic to cache tool definitions and reduce costs.
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
"""

anthropic_cache_instructions: bool
"""Whether to add cache_control to the last system prompt block.
When enabled, the last system prompt will have cache_control set,
allowing Anthropic to cache system instructions and reduce costs.
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
"""


@dataclass(init=False)
class AnthropicModel(Model):
Expand Down Expand Up @@ -289,7 +307,7 @@ async def _messages_create(
model_request_parameters: ModelRequestParameters,
) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
# standalone function to make it easier to override
tools = self._get_tools(model_request_parameters)
tools = self._get_tools(model_request_parameters, model_settings)
tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters)

tool_choice: BetaToolChoiceParam | None
Expand All @@ -305,7 +323,7 @@ async def _messages_create(
if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls

system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters)
system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings)

try:
extra_headers = model_settings.get('extra_headers', {})
Expand Down Expand Up @@ -411,8 +429,19 @@ async def _process_streamed_response(
_provider_url=self._provider.base_url,
)

def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[BetaToolUnionParam]:
return [self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()]
def _get_tools(
self, model_request_parameters: ModelRequestParameters, model_settings: AnthropicModelSettings
) -> list[BetaToolUnionParam]:
tools: list[BetaToolUnionParam] = [
self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()
]

# Add cache_control to the last tool if enabled
if tools and model_settings.get('anthropic_cache_tools'):
last_tool = cast(dict[str, Any], tools[-1])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have to cast it? I'd rather change the type of BetaToolUnionParam to not be a union, so that we can be sure it's a (typed)dict here.

Copy link
Author

@ronakrm ronakrm Nov 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BetaToolUnionParam is an upstream anthropic package type, I think this is the best we can do for now?

(This cast was unneeded, but the one at ~L700 is, comment added)

last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')

return tools

def _add_builtin_tools(
self, tools: list[BetaToolUnionParam], model_request_parameters: ModelRequestParameters
Expand Down Expand Up @@ -464,8 +493,11 @@ def _add_builtin_tools(
return tools, mcp_servers, beta_features

async def _map_message( # noqa: C901
self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
) -> tuple[str, list[BetaMessageParam]]:
self,
messages: list[ModelMessage],
model_request_parameters: ModelRequestParameters,
model_settings: AnthropicModelSettings,
) -> tuple[str | list[BetaTextBlockParam], list[BetaMessageParam]]:
"""Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
system_prompt_parts: list[str] = []
anthropic_messages: list[BetaMessageParam] = []
Expand All @@ -477,7 +509,10 @@ async def _map_message( # noqa: C901
system_prompt_parts.append(request_part.content)
elif isinstance(request_part, UserPromptPart):
async for content in self._map_user_prompt(request_part):
user_content_params.append(content)
if isinstance(content, CachePoint):
self._add_cache_control_to_last_param(user_content_params)
else:
user_content_params.append(content)
elif isinstance(request_part, ToolReturnPart):
tool_result_block_param = BetaToolResultBlockParam(
tool_use_id=_guard_tool_call_id(t=request_part),
Expand Down Expand Up @@ -637,12 +672,43 @@ async def _map_message( # noqa: C901
if instructions := self._get_instructions(messages, model_request_parameters):
system_prompt_parts.insert(0, instructions)
system_prompt = '\n\n'.join(system_prompt_parts)

# If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
if system_prompt and model_settings.get('anthropic_cache_instructions'):
system_prompt_blocks = [
BetaTextBlockParam(
type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
)
]
return system_prompt_blocks, anthropic_messages

return system_prompt, anthropic_messages

@staticmethod
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
"""Add cache control to the last content block param.
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
"""
if not params:
raise UserError(
'CachePoint cannot be the first content in a user message - there must be previous content to attach the CachePoint to.'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copying in context from https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#what-can-be-cached:

Tools: Tool definitions in the tools array
System messages: Content blocks in the system array
Text messages: Content blocks in the messages.content array, for both user and assistant turns
Images & Documents: Content blocks in the messages.content array, in user turns
Tool use and tool results: Content blocks in the messages.content array, in both user and assistant turns

I think we should support inserting a cache point after tool defs and system messages as well.

In the original PR I suggested doing this by supporting CachePoint as the first content in a user message (by adding it to whatever came before it: the system message, tool definition, or the last message of the assistant output), but that doesn't really feel natural from a code perspective.

What do you think about adding anthropic_cache_tools and anthropic_cache_instructions fields to AnthropicModelSettings, and setting cache_control on the relevant parts when set?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems reasonable, I'll look into it!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's update the message here to make it clear that they are likely looking for one of the 2 settings instead.

)

# Only certain types support cache_control
# See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#what-can-be-cached
cacheable_types = {'text', 'tool_use', 'server_tool_use', 'image', 'tool_result'}
last_param = cast(dict[str, Any], params[-1]) # Cast to dict for mutation
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This didn't work without the cast?

if last_param['type'] not in cacheable_types:
raise UserError(f'Cache control not supported for param type: {last_param["type"]}')

# Add cache_control to the last param
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')

@staticmethod
async def _map_user_prompt(
part: UserPromptPart,
) -> AsyncGenerator[BetaContentBlockParam]:
) -> AsyncGenerator[BetaContentBlockParam | CachePoint]:
if isinstance(part.content, str):
if part.content: # Only yield non-empty text
yield BetaTextBlockParam(text=part.content, type='text')
Expand All @@ -651,6 +717,8 @@ async def _map_user_prompt(
if isinstance(item, str):
if item: # Only yield non-empty text
yield BetaTextBlockParam(text=item, type='text')
elif isinstance(item, CachePoint):
yield item
elif isinstance(item, BinaryContent):
if item.is_image:
yield BetaImageBlockParam(
Expand Down Expand Up @@ -717,6 +785,8 @@ def _map_usage(
key: value for key, value in response_usage.model_dump().items() if isinstance(value, int)
}

# Note: genai-prices already extracts cache_creation_input_tokens and cache_read_input_tokens
# from the Anthropic response and maps them to cache_write_tokens and cache_read_tokens
return usage.RequestUsage.extract(
dict(model=model, usage=details),
provider=provider,
Expand Down
4 changes: 4 additions & 0 deletions pydantic_ai_slim/pydantic_ai/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
BinaryContent,
BuiltinToolCallPart,
BuiltinToolReturnPart,
CachePoint,
DocumentUrl,
FinishReason,
ImageUrl,
Expand Down Expand Up @@ -672,6 +673,9 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int])
content.append({'video': video})
elif isinstance(item, AudioUrl): # pragma: no cover
raise NotImplementedError('Audio is not supported yet.')
elif isinstance(item, CachePoint):
# Bedrock doesn't support prompt caching via CachePoint in this implementation
pass
else:
assert_never(item)
return [{'role': 'user', 'content': content}]
Expand Down
4 changes: 4 additions & 0 deletions pydantic_ai_slim/pydantic_ai/models/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
BinaryContent,
BuiltinToolCallPart,
BuiltinToolReturnPart,
CachePoint,
FilePart,
FileUrl,
ModelMessage,
Expand Down Expand Up @@ -391,6 +392,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[_GeminiPartUnion]
else: # pragma: lax no cover
file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type})
content.append(file_data)
elif isinstance(item, CachePoint):
# Gemini doesn't support prompt caching via CachePoint
pass
else:
assert_never(item) # pragma: lax no cover
return content
Expand Down
Loading
Loading