From 610893e988ffb5d55608b3b7f2428b633560b0a1 Mon Sep 17 00:00:00 2001 From: AbdulKabirs Date: Wed, 17 Sep 2025 15:24:42 +0500 Subject: [PATCH 1/2] docs: improve core components documentation Core Components: - Enhanced agent.py base classes documentation - Added comprehensive Computer interface docs - Improved guardrail system documentation Model & Provider: - Enhanced model interfaces documentation - Improved provider management docs - Added chat completion utility documentation MCP: - Added module documentation - Improved server implementation docs Memory: - Enhanced OpenAI conversation session docs Realtime: - Improved realtime agent and event system docs Voice: - Enhanced TTS model settings documentation --- src/agents/agent.py | 32 ++++++++++++++-- src/agents/computer.py | 15 +++++++- src/agents/guardrail.py | 13 ++++++- src/agents/mcp/__init__.py | 6 +++ src/agents/mcp/server.py | 23 +++++++++++- .../memory/openai_conversations_session.py | 21 +++++++++++ src/agents/models/chatcmpl_helpers.py | 19 +++++++++- src/agents/models/interface.py | 37 +++++++++++++++++-- src/agents/models/multi_provider.py | 28 +++++++++++++- src/agents/realtime/agent.py | 35 ++++++++++++------ src/agents/realtime/events.py | 20 ++++++++-- src/agents/realtime/model_events.py | 28 +++++++++++++- src/agents/voice/model.py | 12 +++++- 13 files changed, 256 insertions(+), 33 deletions(-) diff --git a/src/agents/agent.py b/src/agents/agent.py index b64a6ea1d..0a00c7667 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -37,6 +37,19 @@ @dataclass class ToolsToFinalOutputResult: + """Result type for processing tool outputs into final agent outputs. + + This class helps manage the transition between tool execution results + and the final output of an agent run. It determines whether more LLM + processing is needed or if we have reached the final output state. + + Attributes: + is_final_output: Whether this is the final output. If False, the LLM + will run again and receive the tool call output. + final_output: The final output value. Can be None if `is_final_output` + is False, otherwise must match the `output_type` of the agent. + """ + is_final_output: bool """Whether this is the final output. If False, the LLM will run again and receive the tool call output. @@ -73,10 +86,23 @@ class MCPConfig(TypedDict): @dataclass class AgentBase(Generic[TContext]): - """Base class for `Agent` and `RealtimeAgent`.""" + """Base class for all agent implementations in the OpenAI Agents SDK. + + This class provides the core functionality shared between standard agents + and realtime agents. It manages tools, model settings, and agent configuration. + + Generic Args: + TContext: The type of context maintained during agent execution. + + Key Features: + - Tool management and execution + - Model configuration + - Handoff support for agent collaboration + - Context management across runs + """ name: str - """The name of the agent.""" + """The name of the agent, used for identification and logging.""" handoff_description: str | None = None """A description of the agent. This is used when the agent is used as a handoff, so that an @@ -84,7 +110,7 @@ class AgentBase(Generic[TContext]): """ tools: list[Tool] = field(default_factory=list) - """A list of tools that the agent can use.""" + """A list of tools that the agent has access to and can use during execution.""" mcp_servers: list[MCPServer] = field(default_factory=list) """A list of [Model Context Protocol](https://modelcontextprotocol.io/) servers that diff --git a/src/agents/computer.py b/src/agents/computer.py index 1b9224d59..d1ebd6771 100644 --- a/src/agents/computer.py +++ b/src/agents/computer.py @@ -6,8 +6,19 @@ class Computer(abc.ABC): - """A computer implemented with sync operations. The Computer interface abstracts the - operations needed to control a computer or browser.""" + """Abstract interface for computer and browser control operations. + + This interface defines the standard operations that can be performed on a computer + or browser environment, such as: + - Mouse movements and clicks + - Keyboard input + - Screenshot capture + - Window/viewport dimensions + - Environment detection + + Implementations should provide synchronous operations for each method to ensure + reliable control across different platforms and environments. + """ @property @abc.abstractmethod diff --git a/src/agents/guardrail.py b/src/agents/guardrail.py index 99e287675..c796ce37c 100644 --- a/src/agents/guardrail.py +++ b/src/agents/guardrail.py @@ -18,7 +18,17 @@ @dataclass class GuardrailFunctionOutput: - """The output of a guardrail function.""" + """Output from a guardrail function's validation check. + + This class represents the result of executing a guardrail's validation logic. + It includes both the validation result and optional detailed information about + what was checked and why the validation succeeded or failed. + + Use this to: + - Track guardrail validation results + - Provide detailed feedback about validation checks + - Control agent execution flow based on validation + """ output_info: Any """ @@ -29,6 +39,7 @@ class GuardrailFunctionOutput: tripwire_triggered: bool """ Whether the tripwire was triggered. If triggered, the agent's execution will be halted. + Set to True to stop agent execution when validation fails. """ diff --git a/src/agents/mcp/__init__.py b/src/agents/mcp/__init__.py index da5a68b16..9e1d7b7dd 100644 --- a/src/agents/mcp/__init__.py +++ b/src/agents/mcp/__init__.py @@ -1,3 +1,9 @@ +"""Model Context Protocol (MCP) for OpenAI Agents SDK. + +Provides server implementations and utilities for Model Context Protocol, +enabling standardized communication between agents and external tools. +""" + try: from .server import ( MCPServer, diff --git a/src/agents/mcp/server.py b/src/agents/mcp/server.py index 0acb1345a..90dc4bda3 100644 --- a/src/agents/mcp/server.py +++ b/src/agents/mcp/server.py @@ -230,7 +230,17 @@ def create_streams( GetSessionIdCallback | None, ] ]: - """Create the streams for the server.""" + """Create communication streams for the MCP server. + + Returns: + A context manager that yields a tuple containing: + - A receive stream for incoming messages and exceptions + - A send stream for outgoing messages + - An optional callback for getting session IDs + + This method is used internally by server implementations to establish + bidirectional communication channels with the MCP service. + """ pass async def __aenter__(self): @@ -245,6 +255,17 @@ def invalidate_tools_cache(self): self._cache_dirty = True async def _run_with_retries(self, func: Callable[[], Awaitable[T]]) -> T: + """Execute a function with exponential backoff retry logic. + + Args: + func: Async function to execute with retries. + + Returns: + The result of the function if successful. + + Retries failed operations using exponential backoff based on + max_retry_attempts and retry_backoff_seconds_base settings. + """ attempts = 0 while True: try: diff --git a/src/agents/memory/openai_conversations_session.py b/src/agents/memory/openai_conversations_session.py index ce0621358..ca9918d48 100644 --- a/src/agents/memory/openai_conversations_session.py +++ b/src/agents/memory/openai_conversations_session.py @@ -20,12 +20,33 @@ async def start_openai_conversations_session(openai_client: AsyncOpenAI | None = class OpenAIConversationsSession(SessionABC): + """Session implementation using OpenAI's Conversations API for persistence. + + This class provides conversation history storage and retrieval using OpenAI's + Conversations API. It automatically manages conversation IDs and handles + API communication for storing and retrieving conversation items. + + Features: + - Automatic session creation and management + - Persistent storage via OpenAI's infrastructure + - Support for retrieving paginated history + - Automatic client configuration + """ + def __init__( self, *, conversation_id: str | None = None, openai_client: AsyncOpenAI | None = None, ): + """Initialize an OpenAI Conversations session. + + Args: + conversation_id: Optional existing conversation ID to use. + If None, a new conversation will be created. + openai_client: Optional custom OpenAI client to use. + If None, uses default client or creates a new one. + """ self._session_id: str | None = conversation_id _openai_client = openai_client if _openai_client is None: diff --git a/src/agents/models/chatcmpl_helpers.py b/src/agents/models/chatcmpl_helpers.py index 0cee21ecc..a44a7f8ff 100644 --- a/src/agents/models/chatcmpl_helpers.py +++ b/src/agents/models/chatcmpl_helpers.py @@ -10,8 +10,25 @@ class ChatCmplHelpers: + """Helper utilities for OpenAI chat completions API integration. + + This class provides utilities for working with OpenAI's chat completions API, + handling common tasks like: + - Determining if a client is using OpenAI's official API + - Managing response storage settings + - Configuring streaming options + """ + @classmethod - def is_openai(cls, client: AsyncOpenAI): + def is_openai(cls, client: AsyncOpenAI) -> bool: + """Check if the client is using the official OpenAI API. + + Args: + client: The AsyncOpenAI client instance to check + + Returns: + True if using api.openai.com, False otherwise + """ return str(client.base_url).startswith("https://api.openai.com") @classmethod diff --git a/src/agents/models/interface.py b/src/agents/models/interface.py index f25934780..d5889dc75 100644 --- a/src/agents/models/interface.py +++ b/src/agents/models/interface.py @@ -17,24 +17,53 @@ class ModelTracing(enum.Enum): + """Configuration for model execution tracing and debugging. + + This enum controls how much information is collected during model execution + for debugging, monitoring, and analysis purposes. + """ + DISABLED = 0 - """Tracing is disabled entirely.""" + """Tracing is disabled entirely. No debug information is collected.""" ENABLED = 1 - """Tracing is enabled, and all data is included.""" + """Full tracing is enabled. All data including inputs and outputs is collected.""" ENABLED_WITHOUT_DATA = 2 - """Tracing is enabled, but inputs/outputs are not included.""" + """Tracing is enabled but sensitive data is excluded. Useful for production monitoring.""" def is_disabled(self) -> bool: + """Check if tracing is completely disabled. + + Returns: + True if no tracing information should be collected. + """ return self == ModelTracing.DISABLED def include_data(self) -> bool: + """Check if full data should be included in traces. + + Returns: + True if input/output data should be included in traces. + """ return self == ModelTracing.ENABLED class Model(abc.ABC): - """The base interface for calling an LLM.""" + """Base interface for Large Language Model interactions. + + This abstract class defines the contract for all model implementations + in the OpenAI Agents SDK. It provides methods for both standard and + streaming responses from language models. + + Key responsibilities: + - Handle model API communication + - Process system instructions and user inputs + - Apply model settings and configurations + - Manage tool and handoff integrations + - Support tracing and debugging + - Handle both sync and streaming responses + """ @abc.abstractmethod async def get_response( diff --git a/src/agents/models/multi_provider.py b/src/agents/models/multi_provider.py index d075ac9b6..034e846a4 100644 --- a/src/agents/models/multi_provider.py +++ b/src/agents/models/multi_provider.py @@ -8,13 +8,37 @@ class MultiProviderMap: - """A map of model name prefixes to ModelProviders.""" + """Registry for managing multiple model providers in the system. + + This class maintains a mapping between model name prefixes and their + corresponding ModelProvider implementations. It enables: + - Dynamic registration of model providers + - Provider lookup by model prefix + - Support for multiple model backends + - Flexible provider management + + Example: + ```python + map = MultiProviderMap() + map.add_provider("openai", OpenAIProvider()) + map.add_provider("custom", CustomProvider()) + provider = map.get_provider("openai") # Get OpenAI provider + ``` + """ def __init__(self): + """Initialize an empty provider mapping.""" self._mapping: dict[str, ModelProvider] = {} def has_prefix(self, prefix: str) -> bool: - """Returns True if the given prefix is in the mapping.""" + """Check if a provider exists for the given prefix. + + Args: + prefix: The model name prefix to check + + Returns: + True if a provider is registered for this prefix + """ return prefix in self._mapping def get_mapping(self) -> dict[str, ModelProvider]: diff --git a/src/agents/realtime/agent.py b/src/agents/realtime/agent.py index c04053db4..fb4a7357c 100644 --- a/src/agents/realtime/agent.py +++ b/src/agents/realtime/agent.py @@ -25,18 +25,29 @@ @dataclass class RealtimeAgent(AgentBase, Generic[TContext]): - """A specialized agent instance that is meant to be used within a `RealtimeSession` to build - voice agents. Due to the nature of this agent, some configuration options are not supported - that are supported by regular `Agent` instances. For example: - - `model` choice is not supported, as all RealtimeAgents will be handled by the same model - within a `RealtimeSession`. - - `modelSettings` is not supported, as all RealtimeAgents will be handled by the same model - within a `RealtimeSession`. - - `outputType` is not supported, as RealtimeAgents do not support structured outputs. - - `toolUseBehavior` is not supported, as all RealtimeAgents will be handled by the same model - within a `RealtimeSession`. - - `voice` can be configured on an `Agent` level; however, it cannot be changed after the first - agent within a `RealtimeSession` has spoken. + """Specialized agent for real-time interactive scenarios like voice conversations. + + RealtimeAgent is designed to operate within a RealtimeSession for building + interactive voice and streaming agents. It provides real-time response + capabilities while maintaining consistency across a session. + + Key Features: + - Real-time streaming responses + - Voice interaction support + - Session-wide model consistency + - Streamlined configuration for real-time use + + Limitations: + - Model selection is fixed per RealtimeSession + - Model settings are session-wide + - No structured output support + - Tool behavior is session-controlled + - Voice settings are immutable after first use + + Note: + Unlike standard Agents, RealtimeAgents share core settings within + their session to ensure consistent behavior and performance in + real-time scenarios. See `AgentBase` for base parameters that are shared with `Agent`s. """ diff --git a/src/agents/realtime/events.py b/src/agents/realtime/events.py index 3c523c33b..9171feddc 100644 --- a/src/agents/realtime/events.py +++ b/src/agents/realtime/events.py @@ -15,21 +15,33 @@ @dataclass class RealtimeEventInfo: + """Base information included in all realtime events. + + This class provides the common context and metadata that is + shared across all realtime event types in the system. + """ + context: RunContextWrapper - """The context for the event.""" + """The execution context for the event, containing state and settings.""" @dataclass class RealtimeAgentStartEvent: - """A new agent has started.""" + """Event emitted when a new realtime agent begins execution. + + This event marks the beginning of an agent's lifecycle in a realtime + session. It provides access to the agent instance and execution context + for monitoring and management purposes. + """ agent: RealtimeAgent - """The new agent.""" + """The agent instance that is starting execution.""" info: RealtimeEventInfo - """Common info for all events, such as the context.""" + """Common event information including execution context.""" type: Literal["agent_start"] = "agent_start" + """Discriminator field to identify this as an agent start event.""" @dataclass diff --git a/src/agents/realtime/model_events.py b/src/agents/realtime/model_events.py index 7c839aa18..3c13034a2 100644 --- a/src/agents/realtime/model_events.py +++ b/src/agents/realtime/model_events.py @@ -21,7 +21,19 @@ class RealtimeModelErrorEvent: @dataclass class RealtimeModelToolCallEvent: - """Model attempted a tool/function call.""" + """Event emitted when a model attempts to call a tool/function in realtime. + + This event is generated during model streaming when the model decides + to use a tool. It contains all necessary information to execute the + tool call and track its lifecycle in the realtime session. + + Attributes: + name: Name of the tool/function being called + call_id: Unique identifier for this specific tool call + arguments: JSON-formatted string containing the tool arguments + id: Optional unique identifier for this event + previous_item_id: Optional ID of the item that led to this tool call + """ name: str call_id: str @@ -35,7 +47,19 @@ class RealtimeModelToolCallEvent: @dataclass class RealtimeModelAudioEvent: - """Raw audio bytes emitted by the model.""" + """Event containing streaming audio data from a model's response. + + This event is emitted when a model produces audio output during + a realtime session, typically as part of a text-to-speech or + voice response feature. + + Attributes: + data: Raw audio bytes from the model + response_id: Identifier linking this audio to a specific model response + item_id: ID of the realtime item containing this audio content + content_index: Position of this audio chunk in the item's content array + type: Discriminator field identifying this as an audio event + """ data: bytes response_id: str diff --git a/src/agents/voice/model.py b/src/agents/voice/model.py index b048a452d..43b8735a2 100644 --- a/src/agents/voice/model.py +++ b/src/agents/voice/model.py @@ -20,7 +20,17 @@ @dataclass class TTSModelSettings: - """Settings for a TTS model.""" + """Configuration settings for Text-to-Speech (TTS) model behavior. + + This class defines all configurable aspects of TTS processing, including: + - Voice selection and characteristics + - Audio streaming and buffering + - Data format and transformation + - Model instructions and behavior + + These settings control how text is converted to speech and how the + audio is processed and streamed to clients. + """ voice: TTSVoice | None = None """ From 696651cbaadf4e9487ba1fd97429ad77aff4f7c9 Mon Sep 17 00:00:00 2001 From: AbdulKabirs Date: Wed, 17 Sep 2025 15:33:06 +0500 Subject: [PATCH 2/2] style: fix docstring whitespace issues - Remove trailing whitespace in docstrings - Fix blank line whitespace - Make docstring formatting consistent --- src/agents/agent.py | 12 ++++++------ src/agents/computer.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/agents/agent.py b/src/agents/agent.py index 0a00c7667..c98c4e8bf 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -38,15 +38,15 @@ @dataclass class ToolsToFinalOutputResult: """Result type for processing tool outputs into final agent outputs. - + This class helps manage the transition between tool execution results and the final output of an agent run. It determines whether more LLM processing is needed or if we have reached the final output state. Attributes: - is_final_output: Whether this is the final output. If False, the LLM + is_final_output: Whether this is the final output. If False, the LLM will run again and receive the tool call output. - final_output: The final output value. Can be None if `is_final_output` + final_output: The final output value. Can be None if `is_final_output` is False, otherwise must match the `output_type` of the agent. """ @@ -87,13 +87,13 @@ class MCPConfig(TypedDict): @dataclass class AgentBase(Generic[TContext]): """Base class for all agent implementations in the OpenAI Agents SDK. - + This class provides the core functionality shared between standard agents and realtime agents. It manages tools, model settings, and agent configuration. - + Generic Args: TContext: The type of context maintained during agent execution. - + Key Features: - Tool management and execution - Model configuration diff --git a/src/agents/computer.py b/src/agents/computer.py index d1ebd6771..5bb1cb94d 100644 --- a/src/agents/computer.py +++ b/src/agents/computer.py @@ -7,7 +7,7 @@ class Computer(abc.ABC): """Abstract interface for computer and browser control operations. - + This interface defines the standard operations that can be performed on a computer or browser environment, such as: - Mouse movements and clicks @@ -15,7 +15,7 @@ class Computer(abc.ABC): - Screenshot capture - Window/viewport dimensions - Environment detection - + Implementations should provide synchronous operations for each method to ensure reliable control across different platforms and environments. """