From fdf81661df6cfd3063b9484c132f89ecad1a5a4b Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Mon, 17 Nov 2025 16:52:28 +0800 Subject: [PATCH 1/6] feat: support ttl in CachePoint --- pydantic_ai_slim/pydantic_ai/messages.py | 3 +++ pydantic_ai_slim/pydantic_ai/models/anthropic.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 988430d12a..7cdccbf3c0 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -627,6 +627,9 @@ class CachePoint: kind: Literal['cache-point'] = 'cache-point' """Type identifier, this is available on all parts as a discriminator.""" + ttl: Literal['5m', '1h'] = '5m' + """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). Check https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.""" + MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent UserContent: TypeAlias = str | MultiModalContent | CachePoint diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index c636ba9cfc..195c78e841 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -510,7 +510,7 @@ async def _map_message( # noqa: C901 elif isinstance(request_part, UserPromptPart): async for content in self._map_user_prompt(request_part): if isinstance(content, CachePoint): - self._add_cache_control_to_last_param(user_content_params) + self._add_cache_control_to_last_param(user_content_params, ttl=content.ttl) else: user_content_params.append(content) elif isinstance(request_part, ToolReturnPart): @@ -685,7 +685,7 @@ async def _map_message( # noqa: C901 return system_prompt, anthropic_messages @staticmethod - def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None: + def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None: """Add cache control to the last content block param. See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. @@ -706,7 +706,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> Non raise UserError(f'Cache control not supported for param type: {last_param["type"]}') # Add cache_control to the last param - last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral') + last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl) @staticmethod async def _map_user_prompt( From 9a01a733043fb32b9a7014ad8011f35200fe3396 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Mon, 17 Nov 2025 17:09:12 +0800 Subject: [PATCH 2/6] add ttl for tool and system --- .../pydantic_ai/models/anthropic.py | 28 +++++++++++++++++-- tests/models/test_anthropic.py | 20 +++++++------ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 195c78e841..31cebc360c 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -158,12 +158,28 @@ class AnthropicModelSettings(ModelSettings, total=False): See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ + anthropic_cache_tool_definitions_ttl: Literal['5m', '1h'] + """The TTL for tool definitions cache control. + + When enabled, the last tool in the `tools` array will have `cache_control` set, + allowing Anthropic to cache tool definitions and reduce costs. + See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. + """ + anthropic_cache_instructions: bool """Whether to add `cache_control` to the last system prompt block. When enabled, the last system prompt will have `cache_control` set, allowing Anthropic to cache system instructions and reduce costs. - See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. + See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information. + """ + + anthropic_cache_instructions_ttl: Literal['5m', '1h'] + """The TTL for system instructions cache control. + + When enabled, the last system prompt will have `cache_control` set, + allowing Anthropic to cache system instructions and reduce costs. + See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information. """ @@ -439,7 +455,9 @@ def _get_tools( # Add cache_control to the last tool if enabled if tools and model_settings.get('anthropic_cache_tool_definitions'): last_tool = tools[-1] - last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral') + last_tool['cache_control'] = BetaCacheControlEphemeralParam( + type='ephemeral', ttl=model_settings.get('anthropic_cache_tool_definitions_ttl', '5m') + ) return tools @@ -677,7 +695,11 @@ async def _map_message( # noqa: C901 if system_prompt and model_settings.get('anthropic_cache_instructions'): system_prompt_blocks = [ BetaTextBlockParam( - type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral') + type='text', + text=system_prompt, + cache_control=BetaCacheControlEphemeralParam( + type='ephemeral', ttl=model_settings.get('anthropic_cache_instructions_ttl', '5m') + ), ) ] return system_prompt_blocks, anthropic_messages diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 1170483879..a7114bcc83 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -314,7 +314,11 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None): { 'role': 'user', 'content': [ - {'text': 'Some context to cache', 'type': 'text', 'cache_control': {'type': 'ephemeral'}}, + { + 'text': 'Some context to cache', + 'type': 'text', + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, + }, {'text': 'Now the question', 'type': 'text'}, ], } @@ -339,8 +343,8 @@ async def test_cache_point_multiple_markers(allow_model_requests: None): assert content == snapshot( [ - {'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}}, - {'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}}, + {'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}, + {'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}, {'text': 'Question', 'type': 'text'}, ] ) @@ -389,7 +393,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None): { 'source': {'type': 'url', 'url': 'https://example.com/image.jpg'}, 'type': 'image', - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, }, {'text': 'What is in this image?', 'type': 'text'}, ] @@ -466,7 +470,7 @@ def tool_two() -> str: # pragma: no cover 'name': 'tool_two', 'description': '', 'input_schema': {'additionalProperties': False, 'properties': {}, 'type': 'object'}, - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, }, ] ) @@ -496,7 +500,7 @@ async def test_anthropic_cache_instructions(allow_model_requests: None): { 'type': 'text', 'text': 'This is a test system prompt with instructions.', - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, } ] ) @@ -540,12 +544,12 @@ def my_tool(value: str) -> str: # pragma: no cover 'required': ['value'], 'type': 'object', }, - 'cache_control': {'type': 'ephemeral'}, + 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}, } ] ) assert system == snapshot( - [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral'}}] + [{'type': 'text', 'text': 'System instructions to cache.', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}] ) From 4a19caecfcdd108a57243401576d5bfb28ec5845 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Mon, 17 Nov 2025 17:22:22 +0800 Subject: [PATCH 3/6] fix docstring --- pydantic_ai_slim/pydantic_ai/messages.py | 2 +- pydantic_ai_slim/pydantic_ai/models/anthropic.py | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 7cdccbf3c0..a3e3a1776e 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -628,7 +628,7 @@ class CachePoint: """Type identifier, this is available on all parts as a discriminator.""" ttl: Literal['5m', '1h'] = '5m' - """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). Check https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.""" + """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.""" MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 31cebc360c..d53d9dd231 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -161,9 +161,7 @@ class AnthropicModelSettings(ModelSettings, total=False): anthropic_cache_tool_definitions_ttl: Literal['5m', '1h'] """The TTL for tool definitions cache control. - When enabled, the last tool in the `tools` array will have `cache_control` set, - allowing Anthropic to cache tool definitions and reduce costs. - See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. + See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information. """ anthropic_cache_instructions: bool @@ -171,14 +169,12 @@ class AnthropicModelSettings(ModelSettings, total=False): When enabled, the last system prompt will have `cache_control` set, allowing Anthropic to cache system instructions and reduce costs. - See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information. + See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ anthropic_cache_instructions_ttl: Literal['5m', '1h'] """The TTL for system instructions cache control. - When enabled, the last system prompt will have `cache_control` set, - allowing Anthropic to cache system instructions and reduce costs. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information. """ From d2ce83823fee79ccc43a75efc6623db96186413a Mon Sep 17 00:00:00 2001 From: Zhongsheng Ji <9573586@qq.com> Date: Tue, 18 Nov 2025 10:06:35 +0800 Subject: [PATCH 4/6] Update pydantic_ai_slim/pydantic_ai/messages.py Co-authored-by: Douwe Maan --- pydantic_ai_slim/pydantic_ai/messages.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index a3e3a1776e..62fc42799c 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -628,7 +628,11 @@ class CachePoint: """Type identifier, this is available on all parts as a discriminator.""" ttl: Literal['5m', '1h'] = '5m' - """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.""" + """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). + + Supported by: + + * Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.""" MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent From 08bebd28c7bc112340def5d8f1d6d4c3e0adbcd4 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Tue, 18 Nov 2025 10:19:40 +0800 Subject: [PATCH 5/6] feat: update caching settings for Anthropic model to support TTL values --- docs/models/anthropic.md | 9 ++--- .../pydantic_ai/models/anthropic.py | 34 +++++++------------ tests/models/test_anthropic.py | 34 +++++++++++++++++++ 3 files changed, 51 insertions(+), 26 deletions(-) diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md index 586084ace0..96aa6207c1 100644 --- a/docs/models/anthropic.md +++ b/docs/models/anthropic.md @@ -83,8 +83,8 @@ agent = Agent(model) Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching: 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it -2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt -3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions +2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly +3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly You can combine all three strategies for maximum savings: @@ -96,8 +96,9 @@ agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Detailed instructions...', model_settings=AnthropicModelSettings( + # Use True for default 5m TTL, or specify '5m' / '1h' directly anthropic_cache_instructions=True, - anthropic_cache_tool_definitions=True, + anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions ), ) @@ -134,7 +135,7 @@ agent = Agent( 'anthropic:claude-sonnet-4-5', system_prompt='Instructions...', model_settings=AnthropicModelSettings( - anthropic_cache_instructions=True + anthropic_cache_instructions=True # Default 5m TTL ), ) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index d53d9dd231..8951496d18 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -150,34 +150,24 @@ class AnthropicModelSettings(ModelSettings, total=False): See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information. """ - anthropic_cache_tool_definitions: bool + anthropic_cache_tool_definitions: bool | Literal['5m', '1h'] """Whether to add `cache_control` to the last tool definition. When enabled, the last tool in the `tools` array will have `cache_control` set, allowing Anthropic to cache tool definitions and reduce costs. + If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly. See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ - anthropic_cache_tool_definitions_ttl: Literal['5m', '1h'] - """The TTL for tool definitions cache control. - - See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information. - """ - - anthropic_cache_instructions: bool + anthropic_cache_instructions: bool | Literal['5m', '1h'] """Whether to add `cache_control` to the last system prompt block. When enabled, the last system prompt will have `cache_control` set, allowing Anthropic to cache system instructions and reduce costs. + If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly. See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information. """ - anthropic_cache_instructions_ttl: Literal['5m', '1h'] - """The TTL for system instructions cache control. - - See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information. - """ - @dataclass(init=False) class AnthropicModel(Model): @@ -449,11 +439,11 @@ def _get_tools( ] # Add cache_control to the last tool if enabled - if tools and model_settings.get('anthropic_cache_tool_definitions'): + if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')): + # If True, use '5m'; otherwise use the specified ttl value + ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs last_tool = tools[-1] - last_tool['cache_control'] = BetaCacheControlEphemeralParam( - type='ephemeral', ttl=model_settings.get('anthropic_cache_tool_definitions_ttl', '5m') - ) + last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl) return tools @@ -688,14 +678,14 @@ async def _map_message( # noqa: C901 system_prompt = '\n\n'.join(system_prompt_parts) # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control - if system_prompt and model_settings.get('anthropic_cache_instructions'): + if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')): + # If True, use '5m'; otherwise use the specified ttl value + ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions system_prompt_blocks = [ BetaTextBlockParam( type='text', text=system_prompt, - cache_control=BetaCacheControlEphemeralParam( - type='ephemeral', ttl=model_settings.get('anthropic_cache_instructions_ttl', '5m') - ), + cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl), ) ] return system_prompt_blocks, anthropic_messages diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index a7114bcc83..4ffe7e3d1e 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -553,6 +553,40 @@ def my_tool(value: str) -> str: # pragma: no cover ) +async def test_anthropic_cache_with_custom_ttl(allow_model_requests: None): + """Test that cache settings support custom TTL values ('5m' or '1h').""" + c = completion_message( + [BetaTextBlock(text='Response', type='text')], + usage=BetaUsage(input_tokens=10, output_tokens=5), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent( + m, + system_prompt='System instructions to cache.', + model_settings=AnthropicModelSettings( + anthropic_cache_tool_definitions='1h', # Custom 1h TTL + anthropic_cache_instructions='5m', # Explicit 5m TTL + ), + ) + + @agent.tool_plain + def my_tool(value: str) -> str: # pragma: no cover + return f'Result: {value}' + + await agent.run('test prompt') + + # Verify custom TTL values are applied + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + tools = completion_kwargs['tools'] + system = completion_kwargs['system'] + + # Tool definitions should have 1h TTL + assert tools[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '1h'}) + # System instructions should have 5m TTL + assert system[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'}) + + async def test_async_request_text_response(allow_model_requests: None): c = completion_message( [BetaTextBlock(text='world', type='text')], From c3e243ddf16aa13cd1b5aa6046044191a697b535 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Tue, 18 Nov 2025 10:33:38 +0800 Subject: [PATCH 6/6] fix lint issues --- pydantic_ai_slim/pydantic_ai/messages.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 62fc42799c..5d2c534a3a 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -628,10 +628,10 @@ class CachePoint: """Type identifier, this is available on all parts as a discriminator.""" ttl: Literal['5m', '1h'] = '5m' - """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). - + """The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour). + Supported by: - + * Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""