pydantic · ronakrm · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 7, 2025
diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md
@@ -77,3 +77,133 @@ model = AnthropicModel(
 agent = Agent(model)
 ...
 ```
+
+## Prompt Caching
+
+Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. PydanticAI provides three ways to use prompt caching:
+
+### 1. Cache User Messages with `CachePoint`
+
+Insert a [`CachePoint`][pydantic_ai.messages.CachePoint] marker in your user messages to cache everything before it:
+
+```python {test="skip"}
+from pydantic_ai import Agent, CachePoint
+
+agent = Agent('anthropic:claude-sonnet-4-5')
+
+async def main():
+    # Everything before CachePoint will be cached
+    result = await agent.run([
+        'Long context that should be cached...',
+        CachePoint(),
+        'Your question here'
+    ])
+    print(result.output)
+```
+
+### 2. Cache System Instructions
+
+Use `anthropic_cache_instructions=True` to cache your system prompt:
+
+```python {test="skip"}
+from pydantic_ai import Agent
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+agent = Agent(
+    'anthropic:claude-sonnet-4-5',
+    system_prompt='Long detailed instructions...',
+    model_settings=AnthropicModelSettings(
+        anthropic_cache_instructions=True
+    ),
+)
+
+async def main():
+    result = await agent.run('Your question')
+    print(result.output)
+```
+
+### 3. Cache Tool Definitions
+
+Use `anthropic_cache_tools=True` to cache your tool definitions:
+
+```python {test="skip"}
+from pydantic_ai import Agent
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+agent = Agent(
+    'anthropic:claude-sonnet-4-5',
+    model_settings=AnthropicModelSettings(
+        anthropic_cache_tools=True
+    ),
+)
+
+@agent.tool
+def my_tool() -> str:
+    """Tool definition will be cached."""
+    return 'result'
+
+async def main():
+    result = await agent.run('Use the tool')
+    print(result.output)
+```
+
+### Combining Cache Strategies
+
+You can combine all three caching strategies for maximum savings:
+
+```python {test="skip"}
+from pydantic_ai import Agent, CachePoint, RunContext
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+agent = Agent(
+    'anthropic:claude-sonnet-4-5',
+    system_prompt='Detailed instructions...',
+    model_settings=AnthropicModelSettings(
+        anthropic_cache_instructions=True,
+        anthropic_cache_tools=True,
+    ),
+)
+
+@agent.tool
+def search_docs(ctx: RunContext, query: str) -> str:
+    """Search documentation."""
+    return f'Results for {query}'
+
+async def main():
+    # First call - writes to cache
+    result1 = await agent.run([
+        'Long context from documentation...',
+        CachePoint(),
+        'First question'
+    ])
+
+    # Subsequent calls - read from cache (90% cost reduction)
+    result2 = await agent.run([
+        'Long context from documentation...',  # Same content
+        CachePoint(),
+        'Second question'
+    ])
+    print(f'First: {result1.output}')
+    print(f'Second: {result2.output}')
+```
+
+Access cache usage statistics via `result.usage()`:
+
+```python {test="skip"}
+from pydantic_ai import Agent
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+agent = Agent(
+    'anthropic:claude-sonnet-4-5',
+    system_prompt='Instructions...',
+    model_settings=AnthropicModelSettings(
+        anthropic_cache_instructions=True
+    ),
+)
+
+async def main():
+    result = await agent.run('Your question')
+    usage = result.usage()
+    print(f'Cache write tokens: {usage.cache_write_tokens}')
+    print(f'Cache read tokens: {usage.cache_read_tokens}')
+```
diff --git a/pydantic_ai_slim/pydantic_ai/__init__.py b/pydantic_ai_slim/pydantic_ai/__init__.py
@@ -42,6 +42,7 @@
     BinaryImage,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     DocumentFormat,
     DocumentMediaType,
     DocumentUrl,
@@ -141,6 +142,7 @@
     'BinaryContent',
     'BuiltinToolCallPart',
     'BuiltinToolReturnPart',
+    'CachePoint',
     'DocumentFormat',
     'DocumentMediaType',
     'DocumentUrl',

diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -612,8 +612,24 @@ def __init__(
             raise ValueError('`BinaryImage` must be have a media type that starts with "image/"')  # pragma: no cover
 
 
+@dataclass
+class CachePoint:
+    """A cache point marker for prompt caching.
+
+    Can be inserted into UserPromptPart.content to mark cache boundaries.
+    Models that don't support caching will filter these out.
-    Models that don't support caching will filter these out.
+    
+    Supported by:
+    
+    - Anthropic
-    Models that don't support caching will filter these out.
+    
+    Supported by:
+    
+    - Anthropic
+
+    Supported by:
+
+    - Anthropic
+    """
+
+    kind: Literal['cache-point'] = 'cache-point'
+    """Type identifier, this is available on all parts as a discriminator."""
+
+
 MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
-UserContent: TypeAlias = str | MultiModalContent
+UserContent: TypeAlias = str | MultiModalContent | CachePoint
 
 
 @dataclass(repr=False)
@@ -730,6 +746,9 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me
                 if settings.include_content and settings.include_binary_content:
                     converted_part['content'] = base64.b64encode(part.data).decode()
                 parts.append(converted_part)
+            elif isinstance(part, CachePoint):
+                # CachePoint is a marker, not actual content - skip it for otel
+                pass
             else:
                 parts.append({'type': part.kind})  # pragma: no cover
         return parts

diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
@@ -19,6 +19,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     DocumentUrl,
     FilePart,
     FinishReason,
@@ -58,6 +59,7 @@
     from anthropic.types.beta import (
         BetaBase64PDFBlockParam,
         BetaBase64PDFSourceParam,
+        BetaCacheControlEphemeralParam,
         BetaCitationsDelta,
         BetaCodeExecutionTool20250522Param,
         BetaCodeExecutionToolResultBlock,
@@ -148,6 +150,22 @@ class AnthropicModelSettings(ModelSettings, total=False):
     See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
     """
 
+    anthropic_cache_tools: bool
+    """Whether to add cache_control to the last tool definition.
+
+    When enabled, the last tool in the tools array will have cache_control set,
+    allowing Anthropic to cache tool definitions and reduce costs.
+    See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
+    """
+
+    anthropic_cache_instructions: bool
+    """Whether to add cache_control to the last system prompt block.
+
+    When enabled, the last system prompt will have cache_control set,
+    allowing Anthropic to cache system instructions and reduce costs.
+    See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
+    """
+
 
 @dataclass(init=False)
 class AnthropicModel(Model):
@@ -289,7 +307,7 @@ async def _messages_create(
         model_request_parameters: ModelRequestParameters,
     ) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
         # standalone function to make it easier to override
-        tools = self._get_tools(model_request_parameters)
+        tools = self._get_tools(model_request_parameters, model_settings)
         tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters)
 
         tool_choice: BetaToolChoiceParam | None
@@ -305,7 +323,7 @@ async def _messages_create(
             if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
                 tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
 
-        system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters)
+        system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings)
 
         try:
             extra_headers = model_settings.get('extra_headers', {})
@@ -411,8 +429,19 @@ async def _process_streamed_response(
             _provider_url=self._provider.base_url,
         )
 
-    def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[BetaToolUnionParam]:
-        return [self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()]
+    def _get_tools(
+        self, model_request_parameters: ModelRequestParameters, model_settings: AnthropicModelSettings
+    ) -> list[BetaToolUnionParam]:
+        tools: list[BetaToolUnionParam] = [
+            self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()
+        ]
+
+        # Add cache_control to the last tool if enabled
+        if tools and model_settings.get('anthropic_cache_tools'):
+            last_tool = cast(dict[str, Any], tools[-1])
+            last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
+
+        return tools
 
     def _add_builtin_tools(
         self, tools: list[BetaToolUnionParam], model_request_parameters: ModelRequestParameters
@@ -464,8 +493,11 @@ def _add_builtin_tools(
         return tools, mcp_servers, beta_features
 
     async def _map_message(  # noqa: C901
-        self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
-    ) -> tuple[str, list[BetaMessageParam]]:
+        self,
+        messages: list[ModelMessage],
+        model_request_parameters: ModelRequestParameters,
+        model_settings: AnthropicModelSettings,
+    ) -> tuple[str | list[BetaTextBlockParam], list[BetaMessageParam]]:
         """Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
         system_prompt_parts: list[str] = []
         anthropic_messages: list[BetaMessageParam] = []
@@ -477,7 +509,10 @@ async def _map_message(  # noqa: C901
                         system_prompt_parts.append(request_part.content)
                     elif isinstance(request_part, UserPromptPart):
                         async for content in self._map_user_prompt(request_part):
-                            user_content_params.append(content)
+                            if isinstance(content, CachePoint):
+                                self._add_cache_control_to_last_param(user_content_params)
+                            else:
+                                user_content_params.append(content)
                     elif isinstance(request_part, ToolReturnPart):
                         tool_result_block_param = BetaToolResultBlockParam(
                             tool_use_id=_guard_tool_call_id(t=request_part),
@@ -637,12 +672,43 @@ async def _map_message(  # noqa: C901
         if instructions := self._get_instructions(messages, model_request_parameters):
             system_prompt_parts.insert(0, instructions)
         system_prompt = '\n\n'.join(system_prompt_parts)
+
+        # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
+        if system_prompt and model_settings.get('anthropic_cache_instructions'):
+            system_prompt_blocks = [
+                BetaTextBlockParam(
+                    type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
+                )
+            ]
+            return system_prompt_blocks, anthropic_messages
+
         return system_prompt, anthropic_messages
 
+    @staticmethod
+    def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
+        """Add cache control to the last content block param.
+
+        See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
+        """
+        if not params:
+            raise UserError(
+                'CachePoint cannot be the first content in a user message - there must be previous content to attach the CachePoint to.'
+            )
+
+        # Only certain types support cache_control
+        # See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#what-can-be-cached
+        cacheable_types = {'text', 'tool_use', 'server_tool_use', 'image', 'tool_result'}
+        last_param = cast(dict[str, Any], params[-1])  # Cast to dict for mutation
+        if last_param['type'] not in cacheable_types:
+            raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
+
+        # Add cache_control to the last param
+        last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
+
     @staticmethod
     async def _map_user_prompt(
         part: UserPromptPart,
-    ) -> AsyncGenerator[BetaContentBlockParam]:
+    ) -> AsyncGenerator[BetaContentBlockParam | CachePoint]:
         if isinstance(part.content, str):
             if part.content:  # Only yield non-empty text
                 yield BetaTextBlockParam(text=part.content, type='text')
@@ -651,6 +717,8 @@ async def _map_user_prompt(
                 if isinstance(item, str):
                     if item:  # Only yield non-empty text
                         yield BetaTextBlockParam(text=item, type='text')
+                elif isinstance(item, CachePoint):
+                    yield item
                 elif isinstance(item, BinaryContent):
                     if item.is_image:
                         yield BetaImageBlockParam(
@@ -717,6 +785,8 @@ def _map_usage(
         key: value for key, value in response_usage.model_dump().items() if isinstance(value, int)
     }
 
+    # Note: genai-prices already extracts cache_creation_input_tokens and cache_read_input_tokens
+    # from the Anthropic response and maps them to cache_write_tokens and cache_read_tokens
     return usage.RequestUsage.extract(
         dict(model=model, usage=details),
         provider=provider,

diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py
@@ -19,6 +19,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     DocumentUrl,
     FinishReason,
     ImageUrl,
@@ -672,6 +673,9 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int])
                         content.append({'video': video})
                 elif isinstance(item, AudioUrl):  # pragma: no cover
                     raise NotImplementedError('Audio is not supported yet.')
+                elif isinstance(item, CachePoint):
+                    # Bedrock doesn't support prompt caching via CachePoint in this implementation
+                    pass
                 else:
                     assert_never(item)
         return [{'role': 'user', 'content': content}]

diff --git a/pydantic_ai_slim/pydantic_ai/models/gemini.py b/pydantic_ai_slim/pydantic_ai/models/gemini.py
@@ -21,6 +21,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     FilePart,
     FileUrl,
     ModelMessage,
@@ -391,6 +392,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[_GeminiPartUnion]
                     else:  # pragma: lax no cover
                         file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type})
                         content.append(file_data)
+                elif isinstance(item, CachePoint):
+                    # Gemini doesn't support prompt caching via CachePoint
+                    pass
                 else:
                     assert_never(item)  # pragma: lax no cover
         return content