Merge branch 'main' into add-file-search-tools-support

gorkachea · web-flow · commit 3116b2d02f78 · 2025-11-11T10:01:44.000+01:00
diff --git a/Makefile b/Makefile
@@ -53,16 +53,16 @@ typecheck-both: typecheck-pyright typecheck-mypy
 .PHONY: test
 test: ## Run tests and collect coverage data
 	@# To test using a specific version of python, run 'make install-all-python' then set environment variable PYTEST_PYTHON=3.10 or similar
-	$(if $(PYTEST_PYTHON),UV_PROJECT_ENVIRONMENT=.venv$(subst .,,$(PYTEST_PYTHON))) uv run $(if $(PYTEST_PYTHON),--python $(PYTEST_PYTHON)) coverage run -m pytest -n auto --dist=loadgroup --durations=20
+	COLUMNS=150 $(if $(PYTEST_PYTHON),UV_PROJECT_ENVIRONMENT=.venv$(subst .,,$(PYTEST_PYTHON))) uv run $(if $(PYTEST_PYTHON),--python $(PYTEST_PYTHON)) coverage run -m pytest -n auto --dist=loadgroup --durations=20
 	@uv run coverage combine
 	@uv run coverage report
 
 .PHONY: test-all-python
 test-all-python: ## Run tests on Python 3.10 to 3.13
-	UV_PROJECT_ENVIRONMENT=.venv310 uv run --python 3.10 --all-extras --all-packages coverage run -p -m pytest
-	UV_PROJECT_ENVIRONMENT=.venv311 uv run --python 3.11 --all-extras --all-packages coverage run -p -m pytest
-	UV_PROJECT_ENVIRONMENT=.venv312 uv run --python 3.12 --all-extras --all-packages coverage run -p -m pytest
-	UV_PROJECT_ENVIRONMENT=.venv313 uv run --python 3.13 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv310 uv run --python 3.10 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv311 uv run --python 3.11 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv312 uv run --python 3.12 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv313 uv run --python 3.13 --all-extras --all-packages coverage run -p -m pytest
 	@uv run coverage combine
 	@uv run coverage report
 
diff --git a/docs/examples/ag-ui.md b/docs/examples/ag-ui.md
@@ -1,6 +1,6 @@
 # Agent User Interaction (AG-UI)
 
-Example of using Pydantic AI agents with the [AG-UI Dojo](https://github.com/ag-ui-protocol/ag-ui/tree/main/typescript-sdk/apps/dojo) example app.
+Example of using Pydantic AI agents with the [AG-UI Dojo](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo) example app.
 
 See the [AG-UI docs](../ui/ag-ui.md) for more information about the AG-UI integration.
 
@@ -48,7 +48,7 @@ Next run the AG-UI Dojo example frontend.
     cd ag-ui/sdks/typescript
     ```
 
-3. Run the Dojo app following the [official instructions](https://github.com/ag-ui-protocol/ag-ui/tree/main/typescript-sdk/apps/dojo#development-setup)
+3. Run the Dojo app following the [official instructions](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo#development-setup)
 4. Visit <http://localhost:3000/pydantic-ai>
 5. Select View `Pydantic AI` from the sidebar
 
diff --git a/docs/models/bedrock.md b/docs/models/bedrock.md
@@ -114,3 +114,47 @@ model = BedrockConverseModel(
 agent = Agent(model)
 ...
 ```
+
+## Configuring Retries
+
+Bedrock uses boto3's built-in retry mechanisms. You can configure retry behavior by passing a custom boto3 client with retry settings:
+
+```python
+import boto3
+from botocore.config import Config
+
+from pydantic_ai import Agent
+from pydantic_ai.models.bedrock import BedrockConverseModel
+from pydantic_ai.providers.bedrock import BedrockProvider
+
+# Configure retry settings
+config = Config(
+    retries={
+        'max_attempts': 5,
+        'mode': 'adaptive'  # Recommended for rate limiting
+    }
+)
+
+bedrock_client = boto3.client(
+    'bedrock-runtime',
+    region_name='us-east-1',
+    config=config
+)
+
+model = BedrockConverseModel(
+    'us.amazon.nova-micro-v1:0',
+    provider=BedrockProvider(bedrock_client=bedrock_client),
+)
+agent = Agent(model)
+```
+
+### Retry Modes
+
+- `'legacy'` (default): 5 attempts, basic retry behavior
+- `'standard'`: 3 attempts, more comprehensive error coverage
+- `'adaptive'`: 3 attempts with client-side rate limiting (recommended for handling `ThrottlingException`)
+
+For more details on boto3 retry configuration, see the [AWS boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html).
+
+!!! note
+    Unlike other providers that use httpx for HTTP requests, Bedrock uses boto3's native retry mechanisms. The retry strategies described in [HTTP Request Retries](../retries.md) do not apply to Bedrock.
diff --git a/docs/retries.md b/docs/retries.md
@@ -339,3 +339,17 @@ agent = Agent(model)
 - Use async transports for better concurrency when handling multiple requests
 
 For more advanced retry configurations, refer to the [tenacity documentation](https://tenacity.readthedocs.io/).
+
+## Provider-Specific Retry Behavior
+
+### AWS Bedrock
+
+The AWS Bedrock provider uses boto3's built-in retry mechanisms instead of httpx. To configure retries for Bedrock, use boto3's `Config`:
+
+```python
+from botocore.config import Config
+
+config = Config(retries={'max_attempts': 5, 'mode': 'adaptive'})
+```
+
+See [Bedrock: Configuring Retries](models/bedrock.md#configuring-retries) for complete examples.
diff --git a/pydantic_ai_slim/pydantic_ai/models/__init__.py b/pydantic_ai_slim/pydantic_ai/models/__init__.py
@@ -9,7 +9,7 @@
 import base64
 import warnings
 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator, Iterator
+from collections.abc import AsyncIterator, Callable, Iterator
 from contextlib import asynccontextmanager, contextmanager
 from dataclasses import dataclass, field, replace
 from datetime import datetime
@@ -47,7 +47,7 @@
 )
 from ..output import OutputMode
 from ..profiles import DEFAULT_PROFILE, ModelProfile, ModelProfileSpec
-from ..providers import infer_provider
+from ..providers import Provider, infer_provider
 from ..settings import ModelSettings, merge_model_settings
 from ..tools import ToolDefinition
 from ..usage import RequestUsage
@@ -724,8 +724,17 @@ def override_allow_model_requests(allow_model_requests: bool) -> Iterator[None]:
         ALLOW_MODEL_REQUESTS = old_value  # pyright: ignore[reportConstantRedefinition]
 
 
-def infer_model(model: Model | KnownModelName | str) -> Model:  # noqa: C901
-    """Infer the model from the name."""
+def infer_model(  # noqa: C901
+    model: Model | KnownModelName | str, provider_factory: Callable[[str], Provider[Any]] = infer_provider
+) -> Model:
+    """Infer the model from the name.
+
+    Args:
+        model:
+            Model name to instantiate, in the format of `provider:model`. Use the string "test" to instantiate TestModel.
+        provider_factory:
+            Function that instantiates a provider object. The provider name is passed into the function parameter. Defaults to `provider.infer_provider`.
+    """
     if isinstance(model, Model):
         return model
     elif model == 'test':
@@ -760,7 +769,7 @@ def infer_model(model: Model | KnownModelName | str) -> Model:  # noqa: C901
         )
         provider_name = 'google-vertex'
 
-    provider = infer_provider(provider_name)
+    provider: Provider[Any] = provider_factory(provider_name)
 
     model_kind = provider_name
     if model_kind.startswith('gateway/'):
diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -684,13 +684,18 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
                         provider_name=self.provider_name,
                     )
 
-                if part.text:
-                    if part.thought:
-                        yield self._parts_manager.handle_thinking_delta(vendor_part_id='thinking', content=part.text)
-                    else:
-                        maybe_event = self._parts_manager.handle_text_delta(vendor_part_id='content', content=part.text)
-                        if maybe_event is not None:  # pragma: no branch
-                            yield maybe_event
+                if part.text is not None:
+                    if len(part.text) > 0:
+                        if part.thought:
+                            yield self._parts_manager.handle_thinking_delta(
+                                vendor_part_id='thinking', content=part.text
+                            )
+                        else:
+                            maybe_event = self._parts_manager.handle_text_delta(
+                                vendor_part_id='content', content=part.text
+                            )
+                            if maybe_event is not None:  # pragma: no branch
+                                yield maybe_event
                 elif part.function_call:
                     maybe_event = self._parts_manager.handle_tool_call_delta(
                         vendor_part_id=uuid4(),
@@ -829,7 +834,10 @@ def _process_response_from_parts(
         elif part.code_execution_result is not None:
             assert code_execution_tool_call_id is not None
             item = _map_code_execution_result(part.code_execution_result, provider_name, code_execution_tool_call_id)
-        elif part.text:
+        elif part.text is not None:
+            # Google sometimes sends empty text parts, we don't want to add them to the response
+            if len(part.text) == 0:
+                continue
             if part.thought:
                 item = ThinkingPart(content=part.text)
             else:
diff --git a/pydantic_ai_slim/pydantic_ai/ui/_adapter.py b/pydantic_ai_slim/pydantic_ai/ui/_adapter.py
@@ -19,6 +19,7 @@
 
 from pydantic_ai import DeferredToolRequests, DeferredToolResults
 from pydantic_ai.agent import AbstractAgent
+from pydantic_ai.agent.abstract import Instructions
 from pydantic_ai.builtin_tools import AbstractBuiltinTool
 from pydantic_ai.exceptions import UserError
 from pydantic_ai.messages import ModelMessage
@@ -200,6 +201,7 @@ def run_stream_native(
         message_history: Sequence[ModelMessage] | None = None,
         deferred_tool_results: DeferredToolResults | None = None,
         model: Model | KnownModelName | str | None = None,
+        instructions: Instructions[AgentDepsT] = None,
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: UsageLimits | None = None,
@@ -216,6 +218,7 @@ def run_stream_native(
             message_history: History of the conversation so far.
             deferred_tool_results: Optional results for deferred tool calls in the message history.
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
+            instructions: Optional additional instructions to use for this run.
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
@@ -251,6 +254,7 @@ def run_stream_native(
             model=model,
             deps=deps,
             model_settings=model_settings,
+            instructions=instructions,
             usage_limits=usage_limits,
             usage=usage,
             infer_name=infer_name,
@@ -265,6 +269,7 @@ def run_stream(
         message_history: Sequence[ModelMessage] | None = None,
         deferred_tool_results: DeferredToolResults | None = None,
         model: Model | KnownModelName | str | None = None,
+        instructions: Instructions[AgentDepsT] = None,
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: UsageLimits | None = None,
@@ -282,6 +287,7 @@ def run_stream(
             message_history: History of the conversation so far.
             deferred_tool_results: Optional results for deferred tool calls in the message history.
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
+            instructions: Optional additional instructions to use for this run.
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
@@ -298,6 +304,7 @@ def run_stream(
                 message_history=message_history,
                 deferred_tool_results=deferred_tool_results,
                 model=model,
+                instructions=instructions,
                 deps=deps,
                 model_settings=model_settings,
                 usage_limits=usage_limits,
@@ -318,6 +325,7 @@ async def dispatch_request(
         message_history: Sequence[ModelMessage] | None = None,
         deferred_tool_results: DeferredToolResults | None = None,
         model: Model | KnownModelName | str | None = None,
+        instructions: Instructions[AgentDepsT] = None,
         deps: AgentDepsT = None,
         output_type: OutputSpec[Any] | None = None,
         model_settings: ModelSettings | None = None,
@@ -338,6 +346,7 @@ async def dispatch_request(
             message_history: History of the conversation so far.
             deferred_tool_results: Optional results for deferred tool calls in the message history.
             model: Optional model to use for this run, required if `model` was not set when creating the agent.
+            instructions: Optional additional instructions to use for this run.
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
@@ -375,6 +384,7 @@ async def dispatch_request(
                 deps=deps,
                 output_type=output_type,
                 model=model,
+                instructions=instructions,
                 model_settings=model_settings,
                 usage_limits=usage_limits,
                 usage=usage,
diff --git a/tests/models/test_google.py b/tests/models/test_google.py
@@ -3,6 +3,7 @@
 import datetime
 import os
 import re
+from collections.abc import AsyncIterator
 from typing import Any
 
 import pytest
@@ -47,6 +48,7 @@
     BuiltinToolCallEvent,  # pyright: ignore[reportDeprecated]
     BuiltinToolResultEvent,  # pyright: ignore[reportDeprecated]
 )
+from pydantic_ai.models import ModelRequestParameters
 from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
 from pydantic_ai.settings import ModelSettings
 from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
@@ -56,6 +58,7 @@
 
 with try_import() as imports_successful:
     from google.genai.types import (
+        FinishReason as GoogleFinishReason,
         GenerateContentResponse,
         GenerateContentResponseUsageMetadata,
         HarmBlockThreshold,
@@ -64,7 +67,12 @@
         ModalityTokenCount,
     )
 
-    from pydantic_ai.models.google import GoogleModel, GoogleModelSettings, _metadata_as_usage  # type: ignore
+    from pydantic_ai.models.google import (
+        GeminiStreamedResponse,
+        GoogleModel,
+        GoogleModelSettings,
+        _metadata_as_usage,  # pyright: ignore[reportPrivateUsage]
+    )
     from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings
     from pydantic_ai.providers.google import GoogleProvider
     from pydantic_ai.providers.openai import OpenAIProvider
@@ -3063,3 +3071,52 @@ async def test_google_httpx_client_is_not_closed(allow_model_requests: None, gem
     agent = Agent(GoogleModel('gemini-2.5-flash-lite', provider=GoogleProvider(api_key=gemini_api_key)))
     result = await agent.run('What is the capital of Mexico?')
     assert result.output == snapshot('The capital of Mexico is **Mexico City**.')
+
+
+def test_google_process_response_filters_empty_text_parts(google_provider: GoogleProvider):
+    model = GoogleModel('gemini-2.5-pro', provider=google_provider)
+    response = _generate_response_with_texts(response_id='resp-123', texts=['', 'first', '', 'second'])
+
+    result = model._process_response(response)  # pyright: ignore[reportPrivateUsage]
+
+    assert result.parts == snapshot([TextPart(content='first'), TextPart(content='second')])
+
+
+async def test_gemini_streamed_response_emits_text_events_for_non_empty_parts():
+    chunk = _generate_response_with_texts('stream-1', ['', 'streamed text'])
+
+    async def response_iterator() -> AsyncIterator[GenerateContentResponse]:
+        yield chunk
+
+    streamed_response = GeminiStreamedResponse(
+        model_request_parameters=ModelRequestParameters(),
+        _model_name='gemini-test',
+        _response=response_iterator(),
+        _timestamp=datetime.datetime.now(datetime.timezone.utc),
+        _provider_name='test-provider',
+    )
+
+    events = [event async for event in streamed_response._get_event_iterator()]  # pyright: ignore[reportPrivateUsage]
+    assert events == snapshot([PartStartEvent(index=0, part=TextPart(content='streamed text'))])
+
+
+def _generate_response_with_texts(response_id: str, texts: list[str]) -> GenerateContentResponse:
+    return GenerateContentResponse.model_validate(
+        {
+            'response_id': response_id,
+            'model_version': 'gemini-test',
+            'usage_metadata': GenerateContentResponseUsageMetadata(
+                prompt_token_count=0,
+                candidates_token_count=0,
+            ),
+            'candidates': [
+                {
+                    'finish_reason': GoogleFinishReason.STOP,
+                    'content': {
+                        'role': 'model',
+                        'parts': [{'text': text} for text in texts],
+                    },
+                }
+            ],
+        }
+    )
diff --git a/tests/models/test_model.py b/tests/models/test_model.py
@@ -242,6 +242,17 @@ def test_infer_model(
         assert m2 is m
 
 
+def test_infer_model_with_provider():
+    from pydantic_ai.providers import openai
+
+    provider_class = openai.OpenAIProvider(api_key='1234', base_url='http://test')
+    m = infer_model('openai:gpt-5', lambda x: provider_class)
+
+    assert isinstance(m, OpenAIChatModel)
+    assert m._provider is provider_class  # type: ignore
+    assert m._provider.base_url == 'http://test'  # type: ignore
+
+
 def test_infer_str_unknown():
     with pytest.raises(UserError, match='Unknown model: foobar'):
         infer_model('foobar')