Skip to content

Commit 3116b2d

Browse files
authored
Merge branch 'main' into add-file-search-tools-support
2 parents 4376b96 + aa1fe33 commit 3116b2d

File tree

9 files changed

+174
-21
lines changed

9 files changed

+174
-21
lines changed

Makefile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,16 @@ typecheck-both: typecheck-pyright typecheck-mypy
5353
.PHONY: test
5454
test: ## Run tests and collect coverage data
5555
@# To test using a specific version of python, run 'make install-all-python' then set environment variable PYTEST_PYTHON=3.10 or similar
56-
$(if $(PYTEST_PYTHON),UV_PROJECT_ENVIRONMENT=.venv$(subst .,,$(PYTEST_PYTHON))) uv run $(if $(PYTEST_PYTHON),--python $(PYTEST_PYTHON)) coverage run -m pytest -n auto --dist=loadgroup --durations=20
56+
COLUMNS=150 $(if $(PYTEST_PYTHON),UV_PROJECT_ENVIRONMENT=.venv$(subst .,,$(PYTEST_PYTHON))) uv run $(if $(PYTEST_PYTHON),--python $(PYTEST_PYTHON)) coverage run -m pytest -n auto --dist=loadgroup --durations=20
5757
@uv run coverage combine
5858
@uv run coverage report
5959

6060
.PHONY: test-all-python
6161
test-all-python: ## Run tests on Python 3.10 to 3.13
62-
UV_PROJECT_ENVIRONMENT=.venv310 uv run --python 3.10 --all-extras --all-packages coverage run -p -m pytest
63-
UV_PROJECT_ENVIRONMENT=.venv311 uv run --python 3.11 --all-extras --all-packages coverage run -p -m pytest
64-
UV_PROJECT_ENVIRONMENT=.venv312 uv run --python 3.12 --all-extras --all-packages coverage run -p -m pytest
65-
UV_PROJECT_ENVIRONMENT=.venv313 uv run --python 3.13 --all-extras --all-packages coverage run -p -m pytest
62+
COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv310 uv run --python 3.10 --all-extras --all-packages coverage run -p -m pytest
63+
COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv311 uv run --python 3.11 --all-extras --all-packages coverage run -p -m pytest
64+
COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv312 uv run --python 3.12 --all-extras --all-packages coverage run -p -m pytest
65+
COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv313 uv run --python 3.13 --all-extras --all-packages coverage run -p -m pytest
6666
@uv run coverage combine
6767
@uv run coverage report
6868

docs/examples/ag-ui.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Agent User Interaction (AG-UI)
22

3-
Example of using Pydantic AI agents with the [AG-UI Dojo](https://github.com/ag-ui-protocol/ag-ui/tree/main/typescript-sdk/apps/dojo) example app.
3+
Example of using Pydantic AI agents with the [AG-UI Dojo](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo) example app.
44

55
See the [AG-UI docs](../ui/ag-ui.md) for more information about the AG-UI integration.
66

@@ -48,7 +48,7 @@ Next run the AG-UI Dojo example frontend.
4848
cd ag-ui/sdks/typescript
4949
```
5050

51-
3. Run the Dojo app following the [official instructions](https://github.com/ag-ui-protocol/ag-ui/tree/main/typescript-sdk/apps/dojo#development-setup)
51+
3. Run the Dojo app following the [official instructions](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo#development-setup)
5252
4. Visit <http://localhost:3000/pydantic-ai>
5353
5. Select View `Pydantic AI` from the sidebar
5454

docs/models/bedrock.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,47 @@ model = BedrockConverseModel(
114114
agent = Agent(model)
115115
...
116116
```
117+
118+
## Configuring Retries
119+
120+
Bedrock uses boto3's built-in retry mechanisms. You can configure retry behavior by passing a custom boto3 client with retry settings:
121+
122+
```python
123+
import boto3
124+
from botocore.config import Config
125+
126+
from pydantic_ai import Agent
127+
from pydantic_ai.models.bedrock import BedrockConverseModel
128+
from pydantic_ai.providers.bedrock import BedrockProvider
129+
130+
# Configure retry settings
131+
config = Config(
132+
retries={
133+
'max_attempts': 5,
134+
'mode': 'adaptive' # Recommended for rate limiting
135+
}
136+
)
137+
138+
bedrock_client = boto3.client(
139+
'bedrock-runtime',
140+
region_name='us-east-1',
141+
config=config
142+
)
143+
144+
model = BedrockConverseModel(
145+
'us.amazon.nova-micro-v1:0',
146+
provider=BedrockProvider(bedrock_client=bedrock_client),
147+
)
148+
agent = Agent(model)
149+
```
150+
151+
### Retry Modes
152+
153+
- `'legacy'` (default): 5 attempts, basic retry behavior
154+
- `'standard'`: 3 attempts, more comprehensive error coverage
155+
- `'adaptive'`: 3 attempts with client-side rate limiting (recommended for handling `ThrottlingException`)
156+
157+
For more details on boto3 retry configuration, see the [AWS boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html).
158+
159+
!!! note
160+
Unlike other providers that use httpx for HTTP requests, Bedrock uses boto3's native retry mechanisms. The retry strategies described in [HTTP Request Retries](../retries.md) do not apply to Bedrock.

docs/retries.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,17 @@ agent = Agent(model)
339339
- Use async transports for better concurrency when handling multiple requests
340340

341341
For more advanced retry configurations, refer to the [tenacity documentation](https://tenacity.readthedocs.io/).
342+
343+
## Provider-Specific Retry Behavior
344+
345+
### AWS Bedrock
346+
347+
The AWS Bedrock provider uses boto3's built-in retry mechanisms instead of httpx. To configure retries for Bedrock, use boto3's `Config`:
348+
349+
```python
350+
from botocore.config import Config
351+
352+
config = Config(retries={'max_attempts': 5, 'mode': 'adaptive'})
353+
```
354+
355+
See [Bedrock: Configuring Retries](models/bedrock.md#configuring-retries) for complete examples.

pydantic_ai_slim/pydantic_ai/models/__init__.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import base64
1010
import warnings
1111
from abc import ABC, abstractmethod
12-
from collections.abc import AsyncIterator, Iterator
12+
from collections.abc import AsyncIterator, Callable, Iterator
1313
from contextlib import asynccontextmanager, contextmanager
1414
from dataclasses import dataclass, field, replace
1515
from datetime import datetime
@@ -47,7 +47,7 @@
4747
)
4848
from ..output import OutputMode
4949
from ..profiles import DEFAULT_PROFILE, ModelProfile, ModelProfileSpec
50-
from ..providers import infer_provider
50+
from ..providers import Provider, infer_provider
5151
from ..settings import ModelSettings, merge_model_settings
5252
from ..tools import ToolDefinition
5353
from ..usage import RequestUsage
@@ -724,8 +724,17 @@ def override_allow_model_requests(allow_model_requests: bool) -> Iterator[None]:
724724
ALLOW_MODEL_REQUESTS = old_value # pyright: ignore[reportConstantRedefinition]
725725

726726

727-
def infer_model(model: Model | KnownModelName | str) -> Model: # noqa: C901
728-
"""Infer the model from the name."""
727+
def infer_model( # noqa: C901
728+
model: Model | KnownModelName | str, provider_factory: Callable[[str], Provider[Any]] = infer_provider
729+
) -> Model:
730+
"""Infer the model from the name.
731+
732+
Args:
733+
model:
734+
Model name to instantiate, in the format of `provider:model`. Use the string "test" to instantiate TestModel.
735+
provider_factory:
736+
Function that instantiates a provider object. The provider name is passed into the function parameter. Defaults to `provider.infer_provider`.
737+
"""
729738
if isinstance(model, Model):
730739
return model
731740
elif model == 'test':
@@ -760,7 +769,7 @@ def infer_model(model: Model | KnownModelName | str) -> Model: # noqa: C901
760769
)
761770
provider_name = 'google-vertex'
762771

763-
provider = infer_provider(provider_name)
772+
provider: Provider[Any] = provider_factory(provider_name)
764773

765774
model_kind = provider_name
766775
if model_kind.startswith('gateway/'):

pydantic_ai_slim/pydantic_ai/models/google.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -684,13 +684,18 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
684684
provider_name=self.provider_name,
685685
)
686686

687-
if part.text:
688-
if part.thought:
689-
yield self._parts_manager.handle_thinking_delta(vendor_part_id='thinking', content=part.text)
690-
else:
691-
maybe_event = self._parts_manager.handle_text_delta(vendor_part_id='content', content=part.text)
692-
if maybe_event is not None: # pragma: no branch
693-
yield maybe_event
687+
if part.text is not None:
688+
if len(part.text) > 0:
689+
if part.thought:
690+
yield self._parts_manager.handle_thinking_delta(
691+
vendor_part_id='thinking', content=part.text
692+
)
693+
else:
694+
maybe_event = self._parts_manager.handle_text_delta(
695+
vendor_part_id='content', content=part.text
696+
)
697+
if maybe_event is not None: # pragma: no branch
698+
yield maybe_event
694699
elif part.function_call:
695700
maybe_event = self._parts_manager.handle_tool_call_delta(
696701
vendor_part_id=uuid4(),
@@ -829,7 +834,10 @@ def _process_response_from_parts(
829834
elif part.code_execution_result is not None:
830835
assert code_execution_tool_call_id is not None
831836
item = _map_code_execution_result(part.code_execution_result, provider_name, code_execution_tool_call_id)
832-
elif part.text:
837+
elif part.text is not None:
838+
# Google sometimes sends empty text parts, we don't want to add them to the response
839+
if len(part.text) == 0:
840+
continue
833841
if part.thought:
834842
item = ThinkingPart(content=part.text)
835843
else:

pydantic_ai_slim/pydantic_ai/ui/_adapter.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from pydantic_ai import DeferredToolRequests, DeferredToolResults
2121
from pydantic_ai.agent import AbstractAgent
22+
from pydantic_ai.agent.abstract import Instructions
2223
from pydantic_ai.builtin_tools import AbstractBuiltinTool
2324
from pydantic_ai.exceptions import UserError
2425
from pydantic_ai.messages import ModelMessage
@@ -200,6 +201,7 @@ def run_stream_native(
200201
message_history: Sequence[ModelMessage] | None = None,
201202
deferred_tool_results: DeferredToolResults | None = None,
202203
model: Model | KnownModelName | str | None = None,
204+
instructions: Instructions[AgentDepsT] = None,
203205
deps: AgentDepsT = None,
204206
model_settings: ModelSettings | None = None,
205207
usage_limits: UsageLimits | None = None,
@@ -216,6 +218,7 @@ def run_stream_native(
216218
message_history: History of the conversation so far.
217219
deferred_tool_results: Optional results for deferred tool calls in the message history.
218220
model: Optional model to use for this run, required if `model` was not set when creating the agent.
221+
instructions: Optional additional instructions to use for this run.
219222
deps: Optional dependencies to use for this run.
220223
model_settings: Optional settings to use for this model's request.
221224
usage_limits: Optional limits on model request count or token usage.
@@ -251,6 +254,7 @@ def run_stream_native(
251254
model=model,
252255
deps=deps,
253256
model_settings=model_settings,
257+
instructions=instructions,
254258
usage_limits=usage_limits,
255259
usage=usage,
256260
infer_name=infer_name,
@@ -265,6 +269,7 @@ def run_stream(
265269
message_history: Sequence[ModelMessage] | None = None,
266270
deferred_tool_results: DeferredToolResults | None = None,
267271
model: Model | KnownModelName | str | None = None,
272+
instructions: Instructions[AgentDepsT] = None,
268273
deps: AgentDepsT = None,
269274
model_settings: ModelSettings | None = None,
270275
usage_limits: UsageLimits | None = None,
@@ -282,6 +287,7 @@ def run_stream(
282287
message_history: History of the conversation so far.
283288
deferred_tool_results: Optional results for deferred tool calls in the message history.
284289
model: Optional model to use for this run, required if `model` was not set when creating the agent.
290+
instructions: Optional additional instructions to use for this run.
285291
deps: Optional dependencies to use for this run.
286292
model_settings: Optional settings to use for this model's request.
287293
usage_limits: Optional limits on model request count or token usage.
@@ -298,6 +304,7 @@ def run_stream(
298304
message_history=message_history,
299305
deferred_tool_results=deferred_tool_results,
300306
model=model,
307+
instructions=instructions,
301308
deps=deps,
302309
model_settings=model_settings,
303310
usage_limits=usage_limits,
@@ -318,6 +325,7 @@ async def dispatch_request(
318325
message_history: Sequence[ModelMessage] | None = None,
319326
deferred_tool_results: DeferredToolResults | None = None,
320327
model: Model | KnownModelName | str | None = None,
328+
instructions: Instructions[AgentDepsT] = None,
321329
deps: AgentDepsT = None,
322330
output_type: OutputSpec[Any] | None = None,
323331
model_settings: ModelSettings | None = None,
@@ -338,6 +346,7 @@ async def dispatch_request(
338346
message_history: History of the conversation so far.
339347
deferred_tool_results: Optional results for deferred tool calls in the message history.
340348
model: Optional model to use for this run, required if `model` was not set when creating the agent.
349+
instructions: Optional additional instructions to use for this run.
341350
deps: Optional dependencies to use for this run.
342351
model_settings: Optional settings to use for this model's request.
343352
usage_limits: Optional limits on model request count or token usage.
@@ -375,6 +384,7 @@ async def dispatch_request(
375384
deps=deps,
376385
output_type=output_type,
377386
model=model,
387+
instructions=instructions,
378388
model_settings=model_settings,
379389
usage_limits=usage_limits,
380390
usage=usage,

tests/models/test_google.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import datetime
44
import os
55
import re
6+
from collections.abc import AsyncIterator
67
from typing import Any
78

89
import pytest
@@ -47,6 +48,7 @@
4748
BuiltinToolCallEvent, # pyright: ignore[reportDeprecated]
4849
BuiltinToolResultEvent, # pyright: ignore[reportDeprecated]
4950
)
51+
from pydantic_ai.models import ModelRequestParameters
5052
from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
5153
from pydantic_ai.settings import ModelSettings
5254
from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
@@ -56,6 +58,7 @@
5658

5759
with try_import() as imports_successful:
5860
from google.genai.types import (
61+
FinishReason as GoogleFinishReason,
5962
GenerateContentResponse,
6063
GenerateContentResponseUsageMetadata,
6164
HarmBlockThreshold,
@@ -64,7 +67,12 @@
6467
ModalityTokenCount,
6568
)
6669

67-
from pydantic_ai.models.google import GoogleModel, GoogleModelSettings, _metadata_as_usage # type: ignore
70+
from pydantic_ai.models.google import (
71+
GeminiStreamedResponse,
72+
GoogleModel,
73+
GoogleModelSettings,
74+
_metadata_as_usage, # pyright: ignore[reportPrivateUsage]
75+
)
6876
from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings
6977
from pydantic_ai.providers.google import GoogleProvider
7078
from pydantic_ai.providers.openai import OpenAIProvider
@@ -3063,3 +3071,52 @@ async def test_google_httpx_client_is_not_closed(allow_model_requests: None, gem
30633071
agent = Agent(GoogleModel('gemini-2.5-flash-lite', provider=GoogleProvider(api_key=gemini_api_key)))
30643072
result = await agent.run('What is the capital of Mexico?')
30653073
assert result.output == snapshot('The capital of Mexico is **Mexico City**.')
3074+
3075+
3076+
def test_google_process_response_filters_empty_text_parts(google_provider: GoogleProvider):
3077+
model = GoogleModel('gemini-2.5-pro', provider=google_provider)
3078+
response = _generate_response_with_texts(response_id='resp-123', texts=['', 'first', '', 'second'])
3079+
3080+
result = model._process_response(response) # pyright: ignore[reportPrivateUsage]
3081+
3082+
assert result.parts == snapshot([TextPart(content='first'), TextPart(content='second')])
3083+
3084+
3085+
async def test_gemini_streamed_response_emits_text_events_for_non_empty_parts():
3086+
chunk = _generate_response_with_texts('stream-1', ['', 'streamed text'])
3087+
3088+
async def response_iterator() -> AsyncIterator[GenerateContentResponse]:
3089+
yield chunk
3090+
3091+
streamed_response = GeminiStreamedResponse(
3092+
model_request_parameters=ModelRequestParameters(),
3093+
_model_name='gemini-test',
3094+
_response=response_iterator(),
3095+
_timestamp=datetime.datetime.now(datetime.timezone.utc),
3096+
_provider_name='test-provider',
3097+
)
3098+
3099+
events = [event async for event in streamed_response._get_event_iterator()] # pyright: ignore[reportPrivateUsage]
3100+
assert events == snapshot([PartStartEvent(index=0, part=TextPart(content='streamed text'))])
3101+
3102+
3103+
def _generate_response_with_texts(response_id: str, texts: list[str]) -> GenerateContentResponse:
3104+
return GenerateContentResponse.model_validate(
3105+
{
3106+
'response_id': response_id,
3107+
'model_version': 'gemini-test',
3108+
'usage_metadata': GenerateContentResponseUsageMetadata(
3109+
prompt_token_count=0,
3110+
candidates_token_count=0,
3111+
),
3112+
'candidates': [
3113+
{
3114+
'finish_reason': GoogleFinishReason.STOP,
3115+
'content': {
3116+
'role': 'model',
3117+
'parts': [{'text': text} for text in texts],
3118+
},
3119+
}
3120+
],
3121+
}
3122+
)

tests/models/test_model.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,17 @@ def test_infer_model(
242242
assert m2 is m
243243

244244

245+
def test_infer_model_with_provider():
246+
from pydantic_ai.providers import openai
247+
248+
provider_class = openai.OpenAIProvider(api_key='1234', base_url='http://test')
249+
m = infer_model('openai:gpt-5', lambda x: provider_class)
250+
251+
assert isinstance(m, OpenAIChatModel)
252+
assert m._provider is provider_class # type: ignore
253+
assert m._provider.base_url == 'http://test' # type: ignore
254+
255+
245256
def test_infer_str_unknown():
246257
with pytest.raises(UserError, match='Unknown model: foobar'):
247258
infer_model('foobar')

0 commit comments

Comments
 (0)