Skip to content

Parallel output tool calls with end_strategy='exhaustive' should call all output functions #3485

@Danipulok

Description

@Danipulok

Question

Hey here!

I was testing how Pydantic AI tools calls are done, and noticed that when LLM responses with 2 tool calls (both of which are output type), only one tool is called. Is it intended or is there a way to configure it?

I understand this is likely intended, but seems a little bit non-intuitive IMHO (but I see the logic)

Additional Context

MRE

import asyncio
import os

from pydantic import BaseModel
from pydantic_ai import Agent, ModelSettings, ToolOutput
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.openai import OpenAIProvider


class TextMessage(BaseModel):
    text: str | None = None


async def send_text_message(
    message: TextMessage,
) -> None:
    """Send a text message."""
    print(f"\nText Message: {message}")


class QuickRepliesMessage(BaseModel):
    text: str | None = None
    quick_replies: list[str] | None = None


async def send_quick_replies_message(
    message: QuickRepliesMessage,
) -> None:
    """Send a quick replies message."""
    print(f"\nQuick Replies Message: {message}")


async def main() -> None:
    api_key = os.environ["OPENAI_API_KEY"]
    model = OpenAIChatModel(
        "gpt-4o",
        provider=OpenAIProvider(api_key=api_key),
        settings=ModelSettings(
            temperature=0.1,
        ),
    )
    output_type = [
        ToolOutput(send_text_message, name="send_text_message"),
        ToolOutput(send_quick_replies_message, name="send_quick_replies_message"),
    ]
    agent = Agent(
        model,
        output_type=output_type,
        instructions="For response, call both `send_text_message` and `send_quick_replies_message` tools in parallel",
    )

    user_prompt = "Tell me about Python"

    async with agent.run_stream(user_prompt) as run:
        async for output in run.stream_responses():
            model_response, is_last_message = output
            print(model_response, is_last_message, end="\n\n")


if __name__ == "__main__":
    asyncio.run(main())

Console:

ModelResponse(parts=[ToolCallPart(tool_name='send_text_message', args='{"text": "Python is a high-level, interpreted programming language known for its easy-to-read syntax and broad applicability. Created in the late 1980s by Guido van Rossum and released in 1991, Python emphasizes code readability and simplicity. It supports various programming paradigms, including procedural, object-oriented, and functional programming."}', tool_call_id='call_RcMJNbdMVPfB3jedSL8j4ioE'), ToolCallPart(tool_name='send_quick_replies_message', args='{"text": "Would you like to know about something specific in Python?", "quick_replies": ["Python Syntax", "Libraries", "Use Cases", "History", "Installation"]}', tool_call_id='call_xVHAJyJ6HDq8QzBM0cr7VC6j')], usage=RequestUsage(input_tokens=110, output_tokens=139, details={'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}), model_name='gpt-4o-2024-08-06', timestamp=datetime.datetime(2025, 11, 20, 4, 36, 45, tzinfo=TzInfo(0)), provider_name='openai', provider_details={'finish_reason': 'tool_calls'}, provider_response_id='chatcmpl-CdqhRR4PuMcDZLezcHUd2x00iOSh9', finish_reason='tool_call') True

Text Message: text='Python is a high-level, interpreted programming language known for its easy-to-read syntax and broad applicability. Created in the late 1980s by Guido van Rossum and released in 1991, Python emphasizes code readability and simplicity. It supports various programming paradigms, including procedural, object-oriented, and functional programming.'

Process finished with exit code 0

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions