Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions docs/models/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ contains all the exceptions encountered during the `run` execution.
=== "Python >=3.11"

```python {title="fallback_model_failure.py" py="3.11"}
from pydantic_ai import Agent, ModelHTTPError
from pydantic_ai import Agent, ModelAPIError
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.models.fallback import FallbackModel
from pydantic_ai.models.openai import OpenAIChatModel
Expand All @@ -192,7 +192,7 @@ contains all the exceptions encountered during the `run` execution.
agent = Agent(fallback_model)
try:
response = agent.run_sync('What is the capital of France?')
except* ModelHTTPError as exc_group:
except* ModelAPIError as exc_group:
for exc in exc_group.exceptions:
print(exc)
```
Expand All @@ -206,7 +206,7 @@ contains all the exceptions encountered during the `run` execution.
```python {title="fallback_model_failure.py" noqa="F821" test="skip"}
from exceptiongroup import catch

from pydantic_ai import Agent, ModelHTTPError
from pydantic_ai import Agent, ModelAPIError
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.models.fallback import FallbackModel
from pydantic_ai.models.openai import OpenAIChatModel
Expand All @@ -222,10 +222,11 @@ contains all the exceptions encountered during the `run` execution.
fallback_model = FallbackModel(openai_model, anthropic_model)

agent = Agent(fallback_model)
with catch({ModelHTTPError: model_status_error_handler}):
with catch({ModelAPIError: model_status_error_handler}):
response = agent.run_sync('What is the capital of France?')
```

By default, the `FallbackModel` only moves on to the next model if the current model raises a
[`ModelAPIError`][pydantic_ai.exceptions.ModelAPIError], which includes
[`ModelHTTPError`][pydantic_ai.exceptions.ModelHTTPError]. You can customize this behavior by
passing a custom `fallback_on` argument to the `FallbackModel` constructor.
2 changes: 2 additions & 0 deletions pydantic_ai_slim/pydantic_ai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
CallDeferred,
FallbackExceptionGroup,
IncompleteToolCall,
ModelAPIError,
ModelHTTPError,
ModelRetry,
UnexpectedModelBehavior,
Expand Down Expand Up @@ -126,6 +127,7 @@
'CallDeferred',
'ApprovalRequired',
'ModelRetry',
'ModelAPIError',
'ModelHTTPError',
'FallbackExceptionGroup',
'IncompleteToolCall',
Expand Down
23 changes: 14 additions & 9 deletions pydantic_ai_slim/pydantic_ai/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
'AgentRunError',
'UnexpectedModelBehavior',
'UsageLimitExceeded',
'ModelAPIError',
'ModelHTTPError',
'IncompleteToolCall',
'FallbackExceptionGroup',
Expand Down Expand Up @@ -151,27 +152,31 @@ def __str__(self) -> str:
return self.message


class ModelHTTPError(AgentRunError):
class ModelAPIError(AgentRunError):
"""Raised when a model provider API request fails."""

model_name: str
"""The name of the model associated with the error."""

def __init__(self, model_name: str, message: str):
self.model_name = model_name
super().__init__(message)


class ModelHTTPError(ModelAPIError):
"""Raised when an model provider response has a status code of 4xx or 5xx."""

status_code: int
"""The HTTP status code returned by the API."""

model_name: str
"""The name of the model associated with the error."""

body: object | None
"""The body of the response, if available."""

message: str
"""The error message with the status code and response body, if available."""

def __init__(self, status_code: int, model_name: str, body: object | None = None):
self.status_code = status_code
self.model_name = model_name
self.body = body
message = f'status_code: {status_code}, model_name: {model_name}, body: {body}'
super().__init__(message)
super().__init__(model_name=model_name, message=message)


class FallbackExceptionGroup(ExceptionGroup[Any]):
Expand Down
19 changes: 15 additions & 4 deletions pydantic_ai_slim/pydantic_ai/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .._run_context import RunContext
from .._utils import guard_tool_call_id as _guard_tool_call_id
from ..builtin_tools import CodeExecutionTool, MCPServerTool, MemoryTool, WebSearchTool
from ..exceptions import UserError
from ..exceptions import ModelAPIError, UserError
from ..messages import (
BinaryContent,
BuiltinToolCallPart,
Expand Down Expand Up @@ -55,7 +55,14 @@


try:
from anthropic import NOT_GIVEN, APIStatusError, AsyncAnthropicBedrock, AsyncStream, omit as OMIT
from anthropic import (
NOT_GIVEN,
APIConnectionError,
APIStatusError,
AsyncAnthropicBedrock,
AsyncStream,
omit as OMIT,
)
from anthropic.types.beta import (
BetaBase64PDFBlockParam,
BetaBase64PDFSourceParam,
Expand Down Expand Up @@ -358,7 +365,9 @@ async def _messages_create(
except APIStatusError as e:
if (status_code := e.status_code) >= 400:
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
raise ModelAPIError(model_name=self.model_name, message=e.message) from e # pragma: lax no cover
except APIConnectionError as e:
raise ModelAPIError(model_name=self.model_name, message=e.message) from e

async def _messages_count_tokens(
self,
Expand Down Expand Up @@ -395,7 +404,9 @@ async def _messages_count_tokens(
except APIStatusError as e:
if (status_code := e.status_code) >= 400:
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
raise ModelAPIError(model_name=self.model_name, message=e.message) from e # pragma: lax no cover
except APIConnectionError as e:
raise ModelAPIError(model_name=self.model_name, message=e.message) from e

def _process_response(self, response: BetaMessage) -> ModelResponse:
"""Process a non-streamed response, and prepare a message to return."""
Expand Down
14 changes: 9 additions & 5 deletions pydantic_ai_slim/pydantic_ai/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
usage,
)
from pydantic_ai._run_context import RunContext
from pydantic_ai.exceptions import ModelHTTPError, UserError
from pydantic_ai.exceptions import ModelAPIError, ModelHTTPError, UserError
from pydantic_ai.models import Model, ModelRequestParameters, StreamedResponse, download_item
from pydantic_ai.providers import Provider, infer_provider
from pydantic_ai.providers.bedrock import BedrockModelProfile
Expand Down Expand Up @@ -312,8 +312,10 @@ async def count_tokens(
try:
response = await anyio.to_thread.run_sync(functools.partial(self.client.count_tokens, **params))
except ClientError as e:
status_code = e.response.get('ResponseMetadata', {}).get('HTTPStatusCode', 500)
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.response) from e
status_code = e.response.get('ResponseMetadata', {}).get('HTTPStatusCode')
if isinstance(status_code, int):
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.response) from e
raise ModelAPIError(model_name=self.model_name, message=str(e)) from e
return usage.RequestUsage(input_tokens=response['inputTokens'])

@asynccontextmanager
Expand Down Expand Up @@ -459,8 +461,10 @@ async def _messages_create(
else:
model_response = await anyio.to_thread.run_sync(functools.partial(self.client.converse, **params))
except ClientError as e:
status_code = e.response.get('ResponseMetadata', {}).get('HTTPStatusCode', 500)
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.response) from e
status_code = e.response.get('ResponseMetadata', {}).get('HTTPStatusCode')
if isinstance(status_code, int):
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.response) from e
raise ModelAPIError(model_name=self.model_name, message=str(e)) from e
return model_response

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from typing_extensions import assert_never

from pydantic_ai.exceptions import UserError
from pydantic_ai.exceptions import ModelAPIError, UserError

from .. import ModelHTTPError, usage
from .._utils import generate_tool_call_id as _generate_tool_call_id, guard_tool_call_id as _guard_tool_call_id
Expand Down Expand Up @@ -195,7 +195,7 @@ async def _chat(
except ApiError as e:
if (status_code := e.status_code) and status_code >= 400:
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
raise ModelAPIError(model_name=self.model_name, message=str(e)) from e

def _process_response(self, response: V2ChatResponse) -> ModelResponse:
"""Process a non-streamed response, and prepare a message to return."""
Expand Down
4 changes: 2 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pydantic_ai._run_context import RunContext
from pydantic_ai.models.instrumented import InstrumentedModel

from ..exceptions import FallbackExceptionGroup, ModelHTTPError
from ..exceptions import FallbackExceptionGroup, ModelAPIError
from ..profiles import ModelProfile
from . import KnownModelName, Model, ModelRequestParameters, StreamedResponse, infer_model

Expand All @@ -36,7 +36,7 @@ def __init__(
self,
default_model: Model | KnownModelName | str,
*fallback_models: Model | KnownModelName | str,
fallback_on: Callable[[Exception], bool] | tuple[type[Exception], ...] = (ModelHTTPError,),
fallback_on: Callable[[Exception], bool] | tuple[type[Exception], ...] = (ModelAPIError,),
):
"""Initialize a fallback model instance.

Expand Down
4 changes: 2 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .._output import OutputObjectDefinition
from .._run_context import RunContext
from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, UrlContextTool, WebSearchTool
from ..exceptions import ModelHTTPError, UserError
from ..exceptions import ModelAPIError, ModelHTTPError, UserError
from ..messages import (
BinaryContent,
BuiltinToolCallPart,
Expand Down Expand Up @@ -410,7 +410,7 @@ async def _generate_content(
model_name=self._model_name,
body=cast(Any, e.details), # pyright: ignore[reportUnknownMemberType]
) from e
raise # pragma: lax no cover
raise ModelAPIError(model_name=self._model_name, message=str(e)) from e

async def _build_content_and_config(
self,
Expand Down
8 changes: 5 additions & 3 deletions pydantic_ai_slim/pydantic_ai/models/groq.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .._thinking_part import split_content_into_text_and_thinking
from .._utils import generate_tool_call_id, guard_tool_call_id as _guard_tool_call_id, number_to_datetime
from ..builtin_tools import WebSearchTool
from ..exceptions import UserError
from ..exceptions import ModelAPIError, UserError
from ..messages import (
BinaryContent,
BuiltinToolCallPart,
Expand Down Expand Up @@ -52,7 +52,7 @@
)

try:
from groq import NOT_GIVEN, APIError, APIStatusError, AsyncGroq, AsyncStream
from groq import NOT_GIVEN, APIConnectionError, APIError, APIStatusError, AsyncGroq, AsyncStream
from groq.types import chat
from groq.types.chat.chat_completion_content_part_image_param import ImageURL
from groq.types.chat.chat_completion_message import ExecutedTool
Expand Down Expand Up @@ -314,7 +314,9 @@ async def _completions_create(
except APIStatusError as e:
if (status_code := e.status_code) >= 400:
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
raise ModelAPIError(model_name=self.model_name, message=e.message) from e # pragma: no cover
except APIConnectionError as e:
raise ModelAPIError(model_name=self.model_name, message=e.message) from e

def _process_response(self, response: chat.ChatCompletion) -> ModelResponse:
"""Process a non-streamed response, and prepare a message to return."""
Expand Down
4 changes: 2 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .. import ModelHTTPError, UnexpectedModelBehavior, _utils
from .._run_context import RunContext
from .._utils import generate_tool_call_id as _generate_tool_call_id, now_utc as _now_utc, number_to_datetime
from ..exceptions import UserError
from ..exceptions import ModelAPIError, UserError
from ..messages import (
BinaryContent,
BuiltinToolCallPart,
Expand Down Expand Up @@ -246,7 +246,7 @@ async def _completions_create(
except SDKError as e:
if (status_code := e.status_code) >= 400:
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
raise ModelAPIError(model_name=self.model_name, message=e.message) from e

assert response, 'A unexpected empty response from Mistral.'
return response
Expand Down
8 changes: 6 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pydantic_core import to_json
from typing_extensions import assert_never, deprecated

from .. import ModelHTTPError, UnexpectedModelBehavior, _utils, usage
from .. import ModelAPIError, ModelHTTPError, UnexpectedModelBehavior, _utils, usage
from .._output import DEFAULT_OUTPUT_TOOL_NAME, OutputObjectDefinition
from .._run_context import RunContext
from .._thinking_part import split_content_into_text_and_thinking
Expand Down Expand Up @@ -55,7 +55,7 @@
from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests, download_item, get_user_agent

try:
from openai import NOT_GIVEN, APIStatusError, AsyncOpenAI, AsyncStream
from openai import NOT_GIVEN, APIConnectionError, APIStatusError, AsyncOpenAI, AsyncStream
from openai.types import AllModels, chat, responses
from openai.types.chat import (
ChatCompletionChunk,
Expand Down Expand Up @@ -549,6 +549,8 @@ async def _completions_create(
if (status_code := e.status_code) >= 400:
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
except APIConnectionError as e:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not the top level APIError?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also update all the other models!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will do, I only looked at google but I didn't see any logic handling it and remembered there's another PR adding support for Fallback for the google models, is that still open?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking closer at this, there are three exceptions that inherit from APIError

class APIResponseValidationError(APIError):
class APIStatusError(APIError):
class APIConnectionError(APIError):

we were already handling APIStatusError, now this PR is handling APIConnectionError, but IMO APIResponseValidationError doesn't really belong with the other two, and in a case of bad validation the model should retry instead of falling back.

to be clearer, the two we're handling are issues that likely won't fix themselves, like a bad credential, a bad request, connection or server issues, while a validation error should be rare and should fix itself. when it doesn't fix itself it provides information to the user: "this model doesn't handle your use case very well apparently".

does that make sense?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep makes sense

raise ModelAPIError(model_name=self.model_name, message=e.message) from e

def _validate_completion(self, response: chat.ChatCompletion) -> chat.ChatCompletion:
"""Hook that validates chat completions before processing.
Expand Down Expand Up @@ -1351,6 +1353,8 @@ async def _responses_create(
if (status_code := e.status_code) >= 400:
raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e
raise # pragma: lax no cover
except APIConnectionError as e:
raise ModelAPIError(model_name=self.model_name, message=e.message) from e

def _get_reasoning(self, model_settings: OpenAIResponsesModelSettings) -> Reasoning | Omit:
reasoning_effort = model_settings.get('openai_reasoning_effort', None)
Expand Down
40 changes: 38 additions & 2 deletions tests/models/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
DocumentUrl,
FinalResultEvent,
ImageUrl,
ModelAPIError,
ModelHTTPError,
ModelMessage,
ModelRequest,
Expand Down Expand Up @@ -61,7 +62,7 @@
from .mock_async_stream import MockAsyncStream

with try_import() as imports_successful:
from anthropic import NOT_GIVEN, APIStatusError, AsyncAnthropic
from anthropic import NOT_GIVEN, APIConnectionError, APIStatusError, AsyncAnthropic
from anthropic.lib.tools import BetaAbstractMemoryTool
from anthropic.resources.beta import AsyncBeta
from anthropic.types.beta import (
Expand Down Expand Up @@ -144,7 +145,7 @@ def beta(self) -> AsyncBeta:

@cached_property
def messages(self) -> Any:
return type('Messages', (), {'create': self.messages_create})
return type('Messages', (), {'create': self.messages_create, 'count_tokens': self.messages_count_tokens})

@classmethod
def create_mock(cls, messages_: MockAnthropicMessage | Sequence[MockAnthropicMessage]) -> AsyncAnthropic:
Expand Down Expand Up @@ -180,6 +181,11 @@ async def messages_create(
self.index += 1
return response

async def messages_count_tokens(self, *_args: Any, **_kwargs: Any) -> Any:
if self.messages_ is not None:
raise_if_exception(self.messages_ if not isinstance(self.messages_, Sequence) else self.messages_[0])
return None # pragma: no cover


def completion_message(content: list[BetaContentBlock], usage: BetaUsage) -> BetaMessage:
return BetaMessage(
Expand Down Expand Up @@ -1205,6 +1211,36 @@ def test_model_status_error(allow_model_requests: None) -> None:
)


def test_model_connection_error(allow_model_requests: None) -> None:
mock_client = MockAnthropic.create_mock(
APIConnectionError(
message='Connection to https://api.anthropic.com timed out',
request=httpx.Request('POST', 'https://api.anthropic.com/v1/messages'),
)
)
m = AnthropicModel('claude-sonnet-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
agent = Agent(m)
with pytest.raises(ModelAPIError) as exc_info:
agent.run_sync('hello')
assert exc_info.value.model_name == 'claude-sonnet-4-5'
assert 'Connection to https://api.anthropic.com timed out' in str(exc_info.value.message)


async def test_count_tokens_connection_error(allow_model_requests: None) -> None:
mock_client = MockAnthropic.create_mock(
APIConnectionError(
message='Connection to https://api.anthropic.com timed out',
request=httpx.Request('POST', 'https://api.anthropic.com/v1/messages'),
)
)
m = AnthropicModel('claude-sonnet-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
agent = Agent(m)
with pytest.raises(ModelAPIError) as exc_info:
await agent.run('hello', usage_limits=UsageLimits(input_tokens_limit=20, count_tokens_before_request=True))
assert exc_info.value.model_name == 'claude-sonnet-4-5'
assert 'Connection to https://api.anthropic.com timed out' in str(exc_info.value.message)


async def test_document_binary_content_input(
allow_model_requests: None, anthropic_api_key: str, document_content: BinaryContent
):
Expand Down
Loading