pydantic
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 5 additions & 5 deletions b/‎Makefile‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/examples/ag-ui.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/examples/ag-ui.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/models/bedrock.md‎
Lines changed: 44 additions & 0 deletions b/‎docs/models/bedrock.md‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎docs/retries.md‎
Lines changed: 14 additions & 0 deletions b/‎docs/retries.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/exceptions.py‎
Lines changed: 3 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/exceptions.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/messages.py‎
Lines changed: 4 additions & 3 deletions b/‎pydantic_ai_slim/pydantic_ai/messages.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/__init__.py‎
Lines changed: 23 additions & 10 deletions b/‎pydantic_ai_slim/pydantic_ai/models/__init__.py‎
Lines changed: 23 additions & 10 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/bedrock.py‎
Lines changed: 1 addition & 1 deletion b/‎pydantic_ai_slim/pydantic_ai/models/bedrock.py‎
Lines changed: 1 addition & 1 deletion
@@ -80,7 +80,7 @@ jobs:
       - run: make docs
 
       - run: make docs-insiders
-        if: github.event.pull_request.head.repo.full_name == github.repository || github.ref == 'refs/heads/main'
+        if: (github.event.pull_request.head.repo.full_name == github.repository || github.ref == 'refs/heads/main') && github.repository == 'pydantic/pydantic-ai'
         env:
           PPPR_TOKEN: ${{ secrets.PPPR_TOKEN }}
 
@@ -103,7 +103,7 @@ jobs:
   test-live:
     runs-on: ubuntu-latest
     timeout-minutes: 5
-    if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
+    if: (github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push') && github.repository == 'pydantic/pydantic-ai'
     steps:
       - uses: actions/checkout@v4
 
 
@@ -21,3 +21,5 @@ node_modules/
 /test_tmp/
 .mcp.json
 .claude/
+/.cursor/
+/.devcontainer/
@@ -53,16 +53,16 @@ typecheck-both: typecheck-pyright typecheck-mypy
 .PHONY: test
 test: ## Run tests and collect coverage data
 	@# To test using a specific version of python, run 'make install-all-python' then set environment variable PYTEST_PYTHON=3.10 or similar
-	$(if $(PYTEST_PYTHON),UV_PROJECT_ENVIRONMENT=.venv$(subst .,,$(PYTEST_PYTHON))) uv run $(if $(PYTEST_PYTHON),--python $(PYTEST_PYTHON)) coverage run -m pytest -n auto --dist=loadgroup --durations=20
+	COLUMNS=150 $(if $(PYTEST_PYTHON),UV_PROJECT_ENVIRONMENT=.venv$(subst .,,$(PYTEST_PYTHON))) uv run $(if $(PYTEST_PYTHON),--python $(PYTEST_PYTHON)) coverage run -m pytest -n auto --dist=loadgroup --durations=20
 	@uv run coverage combine
 	@uv run coverage report
 
 .PHONY: test-all-python
 test-all-python: ## Run tests on Python 3.10 to 3.13
-	UV_PROJECT_ENVIRONMENT=.venv310 uv run --python 3.10 --all-extras --all-packages coverage run -p -m pytest
-	UV_PROJECT_ENVIRONMENT=.venv311 uv run --python 3.11 --all-extras --all-packages coverage run -p -m pytest
-	UV_PROJECT_ENVIRONMENT=.venv312 uv run --python 3.12 --all-extras --all-packages coverage run -p -m pytest
-	UV_PROJECT_ENVIRONMENT=.venv313 uv run --python 3.13 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv310 uv run --python 3.10 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv311 uv run --python 3.11 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv312 uv run --python 3.12 --all-extras --all-packages coverage run -p -m pytest
+	COLUMNS=150 UV_PROJECT_ENVIRONMENT=.venv313 uv run --python 3.13 --all-extras --all-packages coverage run -p -m pytest
 	@uv run coverage combine
 	@uv run coverage report
 
 
@@ -1,6 +1,6 @@
 # Agent User Interaction (AG-UI)
 
-Example of using Pydantic AI agents with the [AG-UI Dojo](https://github.com/ag-ui-protocol/ag-ui/tree/main/typescript-sdk/apps/dojo) example app.
+Example of using Pydantic AI agents with the [AG-UI Dojo](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo) example app.
 
 See the [AG-UI docs](../ui/ag-ui.md) for more information about the AG-UI integration.
 
@@ -48,7 +48,7 @@ Next run the AG-UI Dojo example frontend.
     cd ag-ui/sdks/typescript
     ```
 
-3. Run the Dojo app following the [official instructions](https://github.com/ag-ui-protocol/ag-ui/tree/main/typescript-sdk/apps/dojo#development-setup)
+3. Run the Dojo app following the [official instructions](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo#development-setup)
 4. Visit <http://localhost:3000/pydantic-ai>
 5. Select View `Pydantic AI` from the sidebar
 
 
@@ -114,3 +114,47 @@ model = BedrockConverseModel(
 agent = Agent(model)
 ...
 ```
+
+## Configuring Retries
+
+Bedrock uses boto3's built-in retry mechanisms. You can configure retry behavior by passing a custom boto3 client with retry settings:
+
+```python
+import boto3
+from botocore.config import Config
+
+from pydantic_ai import Agent
+from pydantic_ai.models.bedrock import BedrockConverseModel
+from pydantic_ai.providers.bedrock import BedrockProvider
+
+# Configure retry settings
+config = Config(
+    retries={
+        'max_attempts': 5,
+        'mode': 'adaptive'  # Recommended for rate limiting
+    }
+)
+
+bedrock_client = boto3.client(
+    'bedrock-runtime',
+    region_name='us-east-1',
+    config=config
+)
+
+model = BedrockConverseModel(
+    'us.amazon.nova-micro-v1:0',
+    provider=BedrockProvider(bedrock_client=bedrock_client),
+)
+agent = Agent(model)
+```
+
+### Retry Modes
+
+- `'legacy'` (default): 5 attempts, basic retry behavior
+- `'standard'`: 3 attempts, more comprehensive error coverage
+- `'adaptive'`: 3 attempts with client-side rate limiting (recommended for handling `ThrottlingException`)
+
+For more details on boto3 retry configuration, see the [AWS boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html).
+
+!!! note
+    Unlike other providers that use httpx for HTTP requests, Bedrock uses boto3's native retry mechanisms. The retry strategies described in [HTTP Request Retries](../retries.md) do not apply to Bedrock.
@@ -339,3 +339,17 @@ agent = Agent(model)
 - Use async transports for better concurrency when handling multiple requests
 
 For more advanced retry configurations, refer to the [tenacity documentation](https://tenacity.readthedocs.io/).
+
+## Provider-Specific Retry Behavior
+
+### AWS Bedrock
+
+The AWS Bedrock provider uses boto3's built-in retry mechanisms instead of httpx. To configure retries for Bedrock, use boto3's `Config`:
+
+```python
+from botocore.config import Config
+
+config = Config(retries={'max_attempts': 5, 'mode': 'adaptive'})
+```
+
+See [Bedrock: Configuring Retries](models/bedrock.md#configuring-retries) for complete examples.
@@ -44,6 +44,9 @@ def __init__(self, message: str):
     def __eq__(self, other: Any) -> bool:
         return isinstance(other, self.__class__) and other.message == self.message
 
+    def __hash__(self) -> int:
+        return hash((self.__class__, self.message))
+
     @classmethod
     def __get_pydantic_core_schema__(cls, _: Any, __: Any) -> core_schema.CoreSchema:
         """Pydantic core schema to allow `ModelRetry` to be (de)serialized."""
 
@@ -776,10 +776,11 @@ def model_response_str(self) -> str:
     def model_response_object(self) -> dict[str, Any]:
         """Return a dictionary representation of the content, wrapping non-dict types appropriately."""
         # gemini supports JSON dict return values, but no other JSON types, hence we wrap anything else in a dict
-        if isinstance(self.content, dict):
-            return tool_return_ta.dump_python(self.content, mode='json')  # pyright: ignore[reportUnknownMemberType]
+        json_content = tool_return_ta.dump_python(self.content, mode='json')
+        if isinstance(json_content, dict):
+            return json_content  # type: ignore[reportUnknownReturn]
         else:
-            return {'return_value': tool_return_ta.dump_python(self.content, mode='json')}
+            return {'return_value': json_content}
 
     def otel_event(self, settings: InstrumentationSettings) -> Event:
         return Event(
 
@@ -9,7 +9,7 @@
 import base64
 import warnings
 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator, Iterator
+from collections.abc import AsyncIterator, Callable, Iterator
 from contextlib import asynccontextmanager, contextmanager
 from dataclasses import dataclass, field, replace
 from datetime import datetime
@@ -47,7 +47,7 @@
 )
 from ..output import OutputMode
 from ..profiles import DEFAULT_PROFILE, ModelProfile, ModelProfileSpec
-from ..providers import infer_provider
+from ..providers import Provider, infer_provider
 from ..settings import ModelSettings, merge_model_settings
 from ..tools import ToolDefinition
 from ..usage import RequestUsage
@@ -126,18 +126,16 @@
         'cerebras:gpt-oss-120b',
         'cerebras:llama3.1-8b',
         'cerebras:llama-3.3-70b',
-        'cerebras:llama-4-scout-17b-16e-instruct',
-        'cerebras:llama-4-maverick-17b-128e-instruct',
         'cerebras:qwen-3-235b-a22b-instruct-2507',
         'cerebras:qwen-3-32b',
-        'cerebras:qwen-3-coder-480b',
         'cerebras:qwen-3-235b-a22b-thinking-2507',
         'cohere:c4ai-aya-expanse-32b',
         'cohere:c4ai-aya-expanse-8b',
         'cohere:command-nightly',
         'cohere:command-r-08-2024',
         'cohere:command-r-plus-08-2024',
         'cohere:command-r7b-12-2024',
+        'cerebras:zai-glm-4.6',
         'deepseek:deepseek-chat',
         'deepseek:deepseek-reasoner',
         'google-gla:gemini-2.0-flash',
@@ -189,11 +187,15 @@
         'groq:llama-3.2-3b-preview',
         'groq:llama-3.2-11b-vision-preview',
         'groq:llama-3.2-90b-vision-preview',
+        'heroku:amazon-rerank-1-0',
         'heroku:claude-3-5-haiku',
         'heroku:claude-3-5-sonnet-latest',
         'heroku:claude-3-7-sonnet',
-        'heroku:claude-4-sonnet',
         'heroku:claude-3-haiku',
+        'heroku:claude-4-5-haiku',
+        'heroku:claude-4-5-sonnet',
+        'heroku:claude-4-sonnet',
+        'heroku:cohere-rerank-3-5',
         'heroku:gpt-oss-120b',
         'heroku:nova-lite',
         'heroku:nova-pro',
@@ -722,8 +724,17 @@ def override_allow_model_requests(allow_model_requests: bool) -> Iterator[None]:
         ALLOW_MODEL_REQUESTS = old_value  # pyright: ignore[reportConstantRedefinition]
 
 
-def infer_model(model: Model | KnownModelName | str) -> Model:  # noqa: C901
-    """Infer the model from the name."""
+def infer_model(  # noqa: C901
+    model: Model | KnownModelName | str, provider_factory: Callable[[str], Provider[Any]] = infer_provider
+) -> Model:
+    """Infer the model from the name.
+
+    Args:
+        model:
+            Model name to instantiate, in the format of `provider:model`. Use the string "test" to instantiate TestModel.
+        provider_factory:
+            Function that instantiates a provider object. The provider name is passed into the function parameter. Defaults to `provider.infer_provider`.
+    """
     if isinstance(model, Model):
         return model
     elif model == 'test':
@@ -758,11 +769,13 @@ def infer_model(model: Model | KnownModelName | str) -> Model:  # noqa: C901
         )
         provider_name = 'google-vertex'
 
-    provider = infer_provider(provider_name)
+    provider: Provider[Any] = provider_factory(provider_name)
 
     model_kind = provider_name
     if model_kind.startswith('gateway/'):
-        model_kind = provider_name.removeprefix('gateway/')
+        from ..providers.gateway import infer_gateway_model
+
+        return infer_gateway_model(model_kind.removeprefix('gateway/'), model_name=model_name)
     if model_kind in (
         'openai',
         'azure',
 
@@ -226,7 +226,7 @@ def __init__(
         self._model_name = model_name
 
         if isinstance(provider, str):
-            provider = infer_provider('gateway/bedrock' if provider == 'gateway' else provider)
+            provider = infer_provider('gateway/converse' if provider == 'gateway' else provider)
         self._provider = provider
         self.client = cast('BedrockRuntimeClient', provider.client)