Skip to content

Commit ec1dcb5

Browse files
committed
add anthropic_cache_all_ttl
1 parent 4b7e8b6 commit ec1dcb5

File tree

2 files changed

+29
-13
lines changed

2 files changed

+29
-13
lines changed

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ class AnthropicModelSettings(ModelSettings, total=False):
175175
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
176176
"""
177177

178+
anthropic_cache_all_ttl: Literal['5m', '1h']
179+
"""The TTL for the last message in the conversation when `anthropic_cache_all` is enabled.
180+
181+
When `anthropic_cache_all` is enabled, the last message content will have `cache_control` set,
182+
See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information.
183+
"""
184+
178185

179186
@dataclass(init=False)
180187
class AnthropicModel(Model):
@@ -683,19 +690,22 @@ async def _map_message( # noqa: C901
683690
# for subsequent requests that reuse the same context
684691
if anthropic_messages and model_settings.get('anthropic_cache_all'):
685692
m = anthropic_messages[-1]
693+
ttl = model_settings.get('anthropic_cache_all_ttl', '5m')
686694
content = m['content']
687695
if isinstance(content, str):
688696
# Convert string content to structured format with cache_control
689697
# This typically happens with tool responses
690698
m['content'] = [ # pragma: no cover
691699
BetaTextBlockParam(
692-
text=content, type='text', cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
700+
text=content,
701+
type='text',
702+
cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
693703
)
694704
]
695705
else:
696706
# For structured content (lists), add cache_control to the last block
697707
content = cast(list[BetaContentBlockParam], content)
698-
self._add_cache_control_to_last_param(content)
708+
self._add_cache_control_to_last_param(content, ttl=ttl)
699709

700710
if instructions := self._get_instructions(messages, model_request_parameters):
701711
system_prompt_parts.insert(0, instructions)
@@ -713,7 +723,7 @@ async def _map_message( # noqa: C901
713723
return system_prompt, anthropic_messages
714724

715725
@staticmethod
716-
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
726+
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None:
717727
"""Add cache control to the last content block param.
718728
719729
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
@@ -734,7 +744,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> Non
734744
raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
735745

736746
# Add cache_control to the last param
737-
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
747+
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
738748

739749
@staticmethod
740750
async def _map_user_prompt(

tests/models/test_anthropic.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,11 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None):
314314
{
315315
'role': 'user',
316316
'content': [
317-
{'text': 'Some context to cache', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
317+
{
318+
'text': 'Some context to cache',
319+
'type': 'text',
320+
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
321+
},
318322
{'text': 'Now the question', 'type': 'text'},
319323
],
320324
}
@@ -339,8 +343,8 @@ async def test_cache_point_multiple_markers(allow_model_requests: None):
339343

340344
assert content == snapshot(
341345
[
342-
{'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
343-
{'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral'}},
346+
{'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
347+
{'text': 'Second chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
344348
{'text': 'Question', 'type': 'text'},
345349
]
346350
)
@@ -389,7 +393,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None):
389393
{
390394
'source': {'type': 'url', 'url': 'https://example.com/image.jpg'},
391395
'type': 'image',
392-
'cache_control': {'type': 'ephemeral'},
396+
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
393397
},
394398
{'text': 'What is in this image?', 'type': 'text'},
395399
]
@@ -573,7 +577,9 @@ async def test_anthropic_cache_all_text_message(allow_model_requests: None):
573577
content = messages[0]['content']
574578
assert isinstance(content, list)
575579
assert len(content) == 1 # pyright: ignore[reportUnknownArgumentType]
576-
assert content[0] == snapshot({'type': 'text', 'text': 'test prompt', 'cache_control': {'type': 'ephemeral'}})
580+
assert content[0] == snapshot(
581+
{'type': 'text', 'text': 'test prompt', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}}
582+
)
577583

578584

579585
async def test_anthropic_cache_all_with_multiple_messages(allow_model_requests: None):
@@ -617,7 +623,7 @@ async def test_anthropic_cache_all_with_multiple_messages(allow_model_requests:
617623
# Second user message should have cache_control
618624
last_user_content = messages[2]['content']
619625
assert isinstance(last_user_content, list)
620-
assert last_user_content[0]['cache_control'] == {'type': 'ephemeral'}
626+
assert last_user_content[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'})
621627

622628

623629
async def test_anthropic_cache_all_combined_with_other_cache_settings(allow_model_requests: None):
@@ -649,17 +655,17 @@ def my_tool(value: str) -> str: # pragma: no cover
649655

650656
# Check tools have cache_control
651657
tools = completion_kwargs['tools']
652-
assert tools[0]['cache_control'] == {'type': 'ephemeral'}
658+
assert tools[0]['cache_control'] == snapshot({'type': 'ephemeral'})
653659

654660
# Check system has cache_control
655661
system = completion_kwargs['system']
656-
assert system[0]['cache_control'] == {'type': 'ephemeral'}
662+
assert system[0]['cache_control'] == snapshot({'type': 'ephemeral'})
657663

658664
# Check last message has cache_control
659665
messages = completion_kwargs['messages']
660666
last_message_content = messages[-1]['content']
661667
assert isinstance(last_message_content, list)
662-
assert last_message_content[0]['cache_control'] == {'type': 'ephemeral'}
668+
assert last_message_content[0]['cache_control'] == snapshot({'type': 'ephemeral', 'ttl': '5m'})
663669

664670

665671
async def test_async_request_text_response(allow_model_requests: None):

0 commit comments

Comments
 (0)