Skip to content

Commit 756bdc4

Browse files
committed
Merged main and resolved conflict
2 parents 27192da + 1b576dd commit 756bdc4

39 files changed

+2738
-327
lines changed

docs/durable_execution/temporal.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ As workflows and activities run in separate processes, any values passed between
172172

173173
To account for these limitations, tool functions and the [event stream handler](#streaming) running inside activities receive a limited version of the agent's [`RunContext`][pydantic_ai.tools.RunContext], and it's your responsibility to make sure that the [dependencies](../dependencies.md) object provided to [`TemporalAgent.run()`][pydantic_ai.durable_exec.temporal.TemporalAgent.run] can be serialized using Pydantic.
174174

175-
Specifically, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step` and `partial_output` fields are available by default, and trying to access `model`, `usage`, `prompt`, `messages`, or `tracer` will raise an error.
175+
Specifically, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step`, `usage`, and `partial_output` fields are available by default, and trying to access `model`, `prompt`, `messages`, or `tracer` will raise an error.
176176
If you need one or more of these attributes to be available inside activities, you can create a [`TemporalRunContext`][pydantic_ai.durable_exec.temporal.TemporalRunContext] subclass with custom `serialize_run_context` and `deserialize_run_context` class methods and pass it to [`TemporalAgent`][pydantic_ai.durable_exec.temporal.TemporalAgent] as `run_context_type`.
177177

178178
### Streaming

docs/models/anthropic.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ agent = Agent(model)
8383
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
8484

8585
1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
86-
2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
87-
3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
86+
2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
87+
3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
8888

8989
You can combine all three strategies for maximum savings:
9090

@@ -96,8 +96,9 @@ agent = Agent(
9696
'anthropic:claude-sonnet-4-5',
9797
system_prompt='Detailed instructions...',
9898
model_settings=AnthropicModelSettings(
99+
# Use True for default 5m TTL, or specify '5m' / '1h' directly
99100
anthropic_cache_instructions=True,
100-
anthropic_cache_tool_definitions=True,
101+
anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions
101102
),
102103
)
103104

@@ -134,7 +135,7 @@ agent = Agent(
134135
'anthropic:claude-sonnet-4-5',
135136
system_prompt='Instructions...',
136137
model_settings=AnthropicModelSettings(
137-
anthropic_cache_instructions=True
138+
anthropic_cache_instructions=True # Default 5m TTL
138139
),
139140
)
140141

docs/models/google.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,22 +214,22 @@ from pydantic_ai.models.google import GoogleModel, GoogleModelSettings
214214
settings = GoogleModelSettings(
215215
temperature=0.2,
216216
max_tokens=1024,
217-
google_thinking_config={'thinking_budget': 2048},
217+
google_thinking_config={'thinking_level': 'low'},
218218
google_safety_settings=[
219219
{
220220
'category': HarmCategory.HARM_CATEGORY_HATE_SPEECH,
221221
'threshold': HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
222222
}
223223
]
224224
)
225-
model = GoogleModel('gemini-2.5-flash')
225+
model = GoogleModel('gemini-2.5-pro')
226226
agent = Agent(model, model_settings=settings)
227227
...
228228
```
229229

230230
### Disable thinking
231231

232-
You can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
232+
On models older than Gemini 2.5 Pro, you can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
233233

234234
```python
235235
from pydantic_ai import Agent

pydantic_ai_slim/pydantic_ai/_json_schema.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def __init__(
2727
*,
2828
strict: bool | None = None,
2929
prefer_inlined_defs: bool = False,
30-
simplify_nullable_unions: bool = False,
30+
simplify_nullable_unions: bool = False, # TODO (v2): Remove this, no longer used
3131
flatten_allof: bool = False,
3232
):
3333
self.schema = schema
@@ -156,10 +156,9 @@ def _handle_union(self, schema: JsonSchema, union_kind: Literal['anyOf', 'oneOf'
156156

157157
handled = [self._handle(member) for member in members]
158158

159-
# convert nullable unions to nullable types
159+
# TODO (v2): Remove this feature, no longer used
160160
if self.simplify_nullable_unions:
161161
handled = self._simplify_nullable_union(handled)
162-
163162
if len(handled) == 1:
164163
# In this case, no need to retain the union
165164
return handled[0] | schema
@@ -171,7 +170,7 @@ def _handle_union(self, schema: JsonSchema, union_kind: Literal['anyOf', 'oneOf'
171170

172171
@staticmethod
173172
def _simplify_nullable_union(cases: list[JsonSchema]) -> list[JsonSchema]:
174-
# TODO: Should we move this to relevant subclasses? Or is it worth keeping here to make reuse easier?
173+
# TODO (v2): Remove this method, no longer used
175174
if len(cases) == 2 and {'type': 'null'} in cases:
176175
# Find the non-null schema
177176
non_null_schema = next(

pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_run_context.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
class TemporalRunContext(RunContext[AgentDepsT]):
1515
"""The [`RunContext`][pydantic_ai.tools.RunContext] subclass to use to serialize and deserialize the run context for use inside a Temporal activity.
1616
17-
By default, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step` and `partial_output` attributes will be available.
17+
By default, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step`, `usage`, and `partial_output` attributes will be available.
1818
To make another attribute available, create a `TemporalRunContext` subclass with a custom `serialize_run_context` class method that returns a dictionary that includes the attribute and pass it to [`TemporalAgent`][pydantic_ai.durable_exec.temporal.TemporalAgent].
1919
"""
2020

@@ -51,6 +51,7 @@ def serialize_run_context(cls, ctx: RunContext[Any]) -> dict[str, Any]:
5151
'max_retries': ctx.max_retries,
5252
'run_step': ctx.run_step,
5353
'partial_output': ctx.partial_output,
54+
'usage': ctx.usage,
5455
}
5556

5657
@classmethod

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,13 @@ class CachePoint:
627627
kind: Literal['cache-point'] = 'cache-point'
628628
"""Type identifier, this is available on all parts as a discriminator."""
629629

630+
ttl: Literal['5m', '1h'] = '5m'
631+
"""The cache time-to-live, either "5m" (5 minutes) or "1h" (1 hour).
632+
633+
Supported by:
634+
635+
* Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""
636+
630637

631638
MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
632639
UserContent: TypeAlias = str | MultiModalContent | CachePoint
@@ -970,6 +977,9 @@ class ModelRequest:
970977
run_id: str | None = None
971978
"""The unique identifier of the agent run in which this message originated."""
972979

980+
metadata: dict[str, Any] | None = None
981+
"""Additional data that can be accessed programmatically by the application but is not sent to the LLM."""
982+
973983
@classmethod
974984
def user_text_prompt(cls, user_prompt: str, *, instructions: str | None = None) -> ModelRequest:
975985
"""Create a `ModelRequest` with a single user prompt as text."""
@@ -1060,7 +1070,7 @@ class FilePart:
10601070

10611071
def has_content(self) -> bool:
10621072
"""Return `True` if the file content is non-empty."""
1063-
return bool(self.content) # pragma: no cover
1073+
return bool(self.content.data)
10641074

10651075
__repr__ = _utils.dataclasses_no_defaults_repr
10661076

@@ -1214,6 +1224,9 @@ class ModelResponse:
12141224
run_id: str | None = None
12151225
"""The unique identifier of the agent run in which this message originated."""
12161226

1227+
metadata: dict[str, Any] | None = None
1228+
"""Additional data that can be accessed programmatically by the application but is not sent to the LLM."""
1229+
12171230
@property
12181231
def text(self) -> str | None:
12191232
"""Get the text in the response."""

pydantic_ai_slim/pydantic_ai/models/__init__.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,24 +145,28 @@
145145
'cohere:command-r7b-12-2024',
146146
'deepseek:deepseek-chat',
147147
'deepseek:deepseek-reasoner',
148+
'google-gla:gemini-flash-latest',
149+
'google-gla:gemini-flash-lite-latest',
148150
'google-gla:gemini-2.0-flash',
149151
'google-gla:gemini-2.0-flash-lite',
150152
'google-gla:gemini-2.5-flash',
153+
'google-gla:gemini-2.5-flash-preview-09-2025',
154+
'google-gla:gemini-2.5-flash-image',
151155
'google-gla:gemini-2.5-flash-lite',
152156
'google-gla:gemini-2.5-flash-lite-preview-09-2025',
153-
'google-gla:gemini-2.5-flash-preview-09-2025',
154157
'google-gla:gemini-2.5-pro',
155-
'google-gla:gemini-flash-latest',
156-
'google-gla:gemini-flash-lite-latest',
158+
'google-gla:gemini-3-pro-preview',
159+
'google-vertex:gemini-flash-latest',
160+
'google-vertex:gemini-flash-lite-latest',
157161
'google-vertex:gemini-2.0-flash',
158162
'google-vertex:gemini-2.0-flash-lite',
159163
'google-vertex:gemini-2.5-flash',
164+
'google-vertex:gemini-2.5-flash-preview-09-2025',
165+
'google-vertex:gemini-2.5-flash-image',
160166
'google-vertex:gemini-2.5-flash-lite',
161167
'google-vertex:gemini-2.5-flash-lite-preview-09-2025',
162-
'google-vertex:gemini-2.5-flash-preview-09-2025',
163168
'google-vertex:gemini-2.5-pro',
164-
'google-vertex:gemini-flash-latest',
165-
'google-vertex:gemini-flash-lite-latest',
169+
'google-vertex:gemini-3-pro-preview',
166170
'grok:grok-2-image-1212',
167171
'grok:grok-2-vision-1212',
168172
'grok:grok-3',

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -151,19 +151,21 @@ class AnthropicModelSettings(ModelSettings, total=False):
151151
See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
152152
"""
153153

154-
anthropic_cache_tool_definitions: bool
154+
anthropic_cache_tool_definitions: bool | Literal['5m', '1h']
155155
"""Whether to add `cache_control` to the last tool definition.
156156
157157
When enabled, the last tool in the `tools` array will have `cache_control` set,
158158
allowing Anthropic to cache tool definitions and reduce costs.
159+
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
159160
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
160161
"""
161162

162-
anthropic_cache_instructions: bool
163+
anthropic_cache_instructions: bool | Literal['5m', '1h']
163164
"""Whether to add `cache_control` to the last system prompt block.
164165
165166
When enabled, the last system prompt will have `cache_control` set,
166167
allowing Anthropic to cache system instructions and reduce costs.
168+
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
167169
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
168170
"""
169171

@@ -476,9 +478,11 @@ def _get_tools(
476478
]
477479

478480
# Add cache_control to the last tool if enabled
479-
if tools and model_settings.get('anthropic_cache_tool_definitions'):
481+
if tools and (cache_tool_defs := model_settings.get('anthropic_cache_tool_definitions')):
482+
# If True, use '5m'; otherwise use the specified ttl value
483+
ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs
480484
last_tool = tools[-1]
481-
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
485+
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
482486

483487
return tools
484488

@@ -580,7 +584,7 @@ async def _map_message( # noqa: C901
580584
elif isinstance(request_part, UserPromptPart):
581585
async for content in self._map_user_prompt(request_part):
582586
if isinstance(content, CachePoint):
583-
self._add_cache_control_to_last_param(user_content_params)
587+
self._add_cache_control_to_last_param(user_content_params, ttl=content.ttl)
584588
else:
585589
user_content_params.append(content)
586590
elif isinstance(request_part, ToolReturnPart):
@@ -744,18 +748,22 @@ async def _map_message( # noqa: C901
744748
system_prompt = '\n\n'.join(system_prompt_parts)
745749

746750
# If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
747-
if system_prompt and model_settings.get('anthropic_cache_instructions'):
751+
if system_prompt and (cache_instructions := model_settings.get('anthropic_cache_instructions')):
752+
# If True, use '5m'; otherwise use the specified ttl value
753+
ttl: Literal['5m', '1h'] = '5m' if cache_instructions is True else cache_instructions
748754
system_prompt_blocks = [
749755
BetaTextBlockParam(
750-
type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
756+
type='text',
757+
text=system_prompt,
758+
cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
751759
)
752760
]
753761
return system_prompt_blocks, anthropic_messages
754762

755763
return system_prompt, anthropic_messages
756764

757765
@staticmethod
758-
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
766+
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None:
759767
"""Add cache control to the last content block param.
760768
761769
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
@@ -776,7 +784,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> Non
776784
raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
777785

778786
# Add cache_control to the last param
779-
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
787+
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
780788

781789
@staticmethod
782790
async def _map_user_prompt(

0 commit comments

Comments
 (0)