Input and output validation of tool calls

### Question

I am developing an agent using `pydantic-ai` as part of my work, where multiple tools with quite nested `pydantic` base model response models [generated by a custom non-agentic SDK, generation of that is not the responsibility of the agent] will be used. When exploring similar earlier in Agno V1 sometimes wrong formats were being generated causing downstream issues, so I was trying to explore what kind of type validation is happening for agents/tools and created an intentional mistake, and as far as I can see nothing failed.

```python
import asyncio

from devtools import pprint
from openai import AsyncOpenAI
from pydantic_ai import Agent
from pydantic_ai.messages import ModelMessage
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.openai import OpenAIProvider
from pydantic_ai.settings import ModelSettings

CLIENT = AsyncOpenAI(
    api_key="placeholder_api_key",
    base_url="placeholder_base_url",
    default_headers={"placeholder_header_key": "placeholder_header_value"},
)
MODEL = OpenAIChatModel(
    "gpt-4o-mini",
    provider=OpenAIProvider(openai_client=CLIENT),
    settings=ModelSettings(max_tokens=4096, temperature=0, top_p=1, timeout=300),
)

AGENT = Agent(
    model=MODEL, system_prompt="You are a personal assistant. You help to perform basic addition."
)


@AGENT.tool_plain
def add(left_addend: float, right_addend: float) -> float:
    # intentionally returning str instead of float
    return f"result is {left_addend + right_addend}."


async def main(prompt: str) -> list[ModelMessage]:
    response = await AGENT.run(prompt)

    return response.all_messages()


if __name__ == "__main__":
    pprint(asyncio.run(main("What happens if you add one and two?")))
    # [
    #     ModelRequest(
    #         parts=[
    #             SystemPromptPart(
    #                 content="You are a personal assistant. You help to perform basic addition.",
    #                 timestamp=datetime.datetime(
    #                     2025, 9, 17, 3, 49, 7, 93770, tzinfo=datetime.timezone.utc
    #                 ),
    #                 dynamic_ref=None,
    #                 part_kind="system-prompt",
    #             ),
    #             UserPromptPart(
    #                 content="What happens if you add one and two?",
    #                 timestamp=datetime.datetime(
    #                     2025, 9, 17, 3, 49, 7, 93778, tzinfo=datetime.timezone.utc
    #                 ),
    #                 part_kind="user-prompt",
    #             ),
    #         ],
    #         instructions=None,
    #         kind="request",
    #     ),
    #     ModelResponse(
    #         parts=[
    #             ToolCallPart(
    #                 tool_name="add",
    #                 args='{"left_addend":1,"right_addend":2}',
    #                 tool_call_id="call_Ten5yXQ2u7r9PE1cjavlQK8i",
    #                 part_kind="tool-call",
    #             )
    #         ],
    #         usage=RequestUsage(
    #             input_tokens=66,
    #             cache_write_tokens=0,
    #             cache_read_tokens=0,
    #             output_tokens=21,
    #             input_audio_tokens=0,
    #             cache_audio_read_tokens=0,
    #             output_audio_tokens=0,
    #             details={
    #                 "accepted_prediction_tokens": 0,
    #                 "audio_tokens": 0,
    #                 "reasoning_tokens": 0,
    #                 "rejected_prediction_tokens": 0,
    #             },
    #         ),
    #         model_name="gpt-4o-mini-2024-07-18",
    #         timestamp=datetime.datetime(2025, 9, 17, 3, 49, 8, tzinfo=TzInfo(UTC)),
    #         kind="response",
    #         provider_name="openai",
    #         provider_details={"finish_reason": "tool_calls"},
    #         provider_response_id="chatcmpl-CGdSGOnySDWoL5QLQu3yaSScyYXBq",
    #         finish_reason="tool_call",
    #     ),
    #     ModelRequest(
    #         parts=[
    #             ToolReturnPart(
    #                 tool_name="add",
    #                 content="result is 3.0.",
    #                 tool_call_id="call_Ten5yXQ2u7r9PE1cjavlQK8i",
    #                 metadata=None,
    #                 timestamp=datetime.datetime(
    #                     2025, 9, 17, 3, 49, 9, 479763, tzinfo=datetime.timezone.utc
    #                 ),
    #                 part_kind="tool-return",
    #             )
    #         ],
    #         instructions=None,
    #         kind="request",
    #     ),
    #     ModelResponse(
    #         parts=[
    #             TextPart(
    #                 content="If you add one and two, the result is three.",
    #                 id=None,
    #                 part_kind="text",
    #             )
    #         ],
    #         usage=RequestUsage(
    #             input_tokens=101,
    #             cache_write_tokens=0,
    #             cache_read_tokens=0,
    #             output_tokens=13,
    #             input_audio_tokens=0,
    #             cache_audio_read_tokens=0,
    #             output_audio_tokens=0,
    #             details={
    #                 "accepted_prediction_tokens": 0,
    #                 "audio_tokens": 0,
    #                 "reasoning_tokens": 0,
    #                 "rejected_prediction_tokens": 0,
    #             },
    #         ),
    #         model_name="gpt-4o-mini-2024-07-18",
    #         timestamp=datetime.datetime(2025, 9, 17, 3, 49, 9, tzinfo=TzInfo(UTC)),
    #         kind="response",
    #         provider_name="openai",
    #         provider_details={"finish_reason": "stop"},
    #         provider_response_id="chatcmpl-CGdSHf6jJPAAK6CJRMZX8Rgyj6tgD",
    #         finish_reason="stop",
    #     ),
    # ]
```

I want to understand if this non-failure a bug, or by design? Are the tools not expected to be type validated, e.g. similar to how `pydantic.validate_call` works?

### Additional Context

macos 15.7
Python 3.12.8
pydantic-ai-slim 1.0.6

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Input and output validation of tool calls #2928

Question

Additional Context

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Input and output validation of tool calls #2928

Description

Question

Additional Context

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions