Skip to content

Commit 0590c72

Browse files
committed
[8/n] Make realtime more like the rest of agents sdk
Key changes: 1. Transport -> model. 2. Extract any model settings into `RealtimeSessionModelSettings`. 3. RealtimeRunConfig, similar to the RunConfig in `run.py`. Next PR I'll update session to be better.
1 parent 38c5235 commit 0590c72

File tree

12 files changed

+519
-419
lines changed

12 files changed

+519
-419
lines changed

.vscode/launch.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": "Python Debugger: Python File",
9+
"type": "debugpy",
10+
"request": "launch",
11+
"program": "${file}"
12+
}
13+
]
14+
}

examples/realtime/demo.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
import numpy as np
77

8+
from agents.realtime import RealtimeSession
9+
810
# Add the current directory to path so we can import ui
911
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
1012

1113
from agents import function_tool
12-
from agents.realtime import RealtimeAgent, RealtimeSession, RealtimeSessionEvent
14+
from agents.realtime import RealtimeAgent, RealtimeRunner, RealtimeSessionEvent
1315

1416
if TYPE_CHECKING:
1517
from .ui import AppUI
@@ -38,23 +40,34 @@ def get_weather(city: str) -> str:
3840

3941
class Example:
4042
def __init__(self) -> None:
41-
self.session = RealtimeSession(agent)
4243
self.ui = AppUI()
4344
self.ui.connected = asyncio.Event()
4445
self.ui.last_audio_item_id = None
4546
# Set the audio callback
4647
self.ui.set_audio_callback(self.on_audio_recorded)
4748

49+
self.session: RealtimeSession | None = None
50+
4851
async def run(self) -> None:
49-
self.session.add_listener(self.on_event)
50-
await self.session.connect()
51-
self.ui.set_is_connected(True)
52-
await self.ui.run_async()
52+
# Start UI in a separate task instead of waiting for it to complete
53+
ui_task = asyncio.create_task(self.ui.run_async())
54+
55+
# Set up session immediately without waiting for UI to finish
56+
runner = RealtimeRunner(agent)
57+
async with await runner.run() as session:
58+
self.session = session
59+
self.ui.set_is_connected(True)
60+
async for event in session:
61+
await self.on_event(event)
62+
63+
# Wait for UI task to complete when session ends
64+
await ui_task
5365

5466
async def on_audio_recorded(self, audio_bytes: bytes) -> None:
5567
"""Called when audio is recorded by the UI."""
5668
try:
5769
# Send the audio to the session
70+
assert self.session is not None
5871
await self.session.send_audio(audio_bytes)
5972
except Exception as e:
6073
self.ui.log_message(f"Error sending audio: {e}")

src/agents/realtime/__init__.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
2-
from .config import APIKeyOrKeyFunc
2+
from .config import (
3+
RealtimeAudioFormat,
4+
RealtimeClientMessage,
5+
RealtimeInputAudioTranscriptionConfig,
6+
RealtimeModelName,
7+
RealtimeRunConfig,
8+
RealtimeSessionModelSettings,
9+
RealtimeTurnDetectionConfig,
10+
RealtimeUserInput,
11+
RealtimeUserInputMessage,
12+
RealtimeUserInputText,
13+
)
314
from .events import (
415
RealtimeAgentEndEvent,
516
RealtimeAgentStartEvent,
@@ -15,26 +26,34 @@
1526
RealtimeToolEnd,
1627
RealtimeToolStart,
1728
)
18-
from .session import RealtimeSession
19-
from .transport import (
20-
RealtimeModelName,
21-
RealtimeSessionTransport,
22-
RealtimeTransportConnectionOptions,
23-
RealtimeTransportListener,
29+
from .model import (
30+
RealtimeModel,
31+
RealtimeModelConfig,
32+
RealtimeModelListener,
2433
)
34+
from .runner import RealtimeRunner
35+
from .session import RealtimeSession
2536

2637
__all__ = [
2738
"RealtimeAgent",
2839
"RealtimeAgentHooks",
2940
"RealtimeRunHooks",
30-
"RealtimeSession",
41+
"RealtimeRunner",
3142
"RealtimeSessionListener",
3243
"RealtimeSessionListenerFunc",
33-
"APIKeyOrKeyFunc",
44+
"RealtimeRunConfig",
45+
"RealtimeSessionModelSettings",
46+
"RealtimeInputAudioTranscriptionConfig",
47+
"RealtimeTurnDetectionConfig",
48+
"RealtimeAudioFormat",
49+
"RealtimeClientMessage",
50+
"RealtimeUserInput",
51+
"RealtimeUserInputMessage",
52+
"RealtimeUserInputText",
3453
"RealtimeModelName",
35-
"RealtimeSessionTransport",
36-
"RealtimeTransportListener",
37-
"RealtimeTransportConnectionOptions",
54+
"RealtimeModel",
55+
"RealtimeModelListener",
56+
"RealtimeModelConfig",
3857
"RealtimeSessionEvent",
3958
"RealtimeAgentStartEvent",
4059
"RealtimeAgentEndEvent",
@@ -48,4 +67,5 @@
4867
"RealtimeError",
4968
"RealtimeHistoryUpdated",
5069
"RealtimeHistoryAdded",
70+
"RealtimeSession",
5171
]

src/agents/realtime/config.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,44 @@
11
from __future__ import annotations
22

3-
import inspect
43
from typing import (
54
Any,
6-
Callable,
75
Literal,
86
Union,
97
)
108

119
from typing_extensions import NotRequired, TypeAlias, TypedDict
1210

1311
from ..model_settings import ToolChoice
14-
from ..tool import FunctionTool
15-
from ..util._types import MaybeAwaitable
12+
from ..tool import Tool
13+
14+
RealtimeModelName: TypeAlias = Union[
15+
Literal[
16+
"gpt-4o-realtime-preview",
17+
"gpt-4o-mini-realtime-preview",
18+
"gpt-4o-realtime-preview-2025-06-03",
19+
"gpt-4o-realtime-preview-2024-12-17",
20+
"gpt-4o-realtime-preview-2024-10-01",
21+
"gpt-4o-mini-realtime-preview-2024-12-17",
22+
],
23+
str,
24+
]
25+
"""The name of a realtime model."""
1626

1727

1828
class RealtimeClientMessage(TypedDict):
1929
type: str # explicitly required
2030
other_data: NotRequired[dict[str, Any]]
2131

2232

23-
class UserInputText(TypedDict):
33+
class RealtimeUserInputText(TypedDict):
2434
type: Literal["input_text"]
2535
text: str
2636

2737

2838
class RealtimeUserInputMessage(TypedDict):
2939
type: Literal["message"]
3040
role: Literal["user"]
31-
content: list[UserInputText]
41+
content: list[RealtimeUserInputText]
3242

3343

3444
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
@@ -55,9 +65,11 @@ class RealtimeTurnDetectionConfig(TypedDict):
5565
threshold: NotRequired[float]
5666

5767

58-
class RealtimeSessionConfig(TypedDict):
59-
api_key: NotRequired[APIKeyOrKeyFunc]
60-
model: NotRequired[str]
68+
class RealtimeSessionModelSettings(TypedDict):
69+
"""Model settings for a realtime model session."""
70+
71+
model_name: NotRequired[RealtimeModelName]
72+
6173
instructions: NotRequired[str]
6274
modalities: NotRequired[list[Literal["text", "audio"]]]
6375
voice: NotRequired[str]
@@ -68,24 +80,13 @@ class RealtimeSessionConfig(TypedDict):
6880
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
6981

7082
tool_choice: NotRequired[ToolChoice]
71-
tools: NotRequired[list[FunctionTool]]
72-
73-
74-
APIKeyOrKeyFunc = str | Callable[[], MaybeAwaitable[str]]
75-
"""Either an API key or a function that returns an API key."""
76-
83+
tools: NotRequired[list[Tool]]
7784

78-
async def get_api_key(key: APIKeyOrKeyFunc | None) -> str | None:
79-
"""Get the API key from the key or key function."""
80-
if key is None:
81-
return None
82-
elif isinstance(key, str):
83-
return key
8485

85-
result = key()
86-
if inspect.isawaitable(result):
87-
return await result
88-
return result
86+
class RealtimeRunConfig(TypedDict):
87+
model_settings: NotRequired[RealtimeSessionModelSettings]
8988

9089
# TODO (rm) Add tracing support
9190
# tracing: NotRequired[RealtimeTracingConfig | None]
91+
# TODO (rm) Add guardrail support
92+
# TODO (rm) Add history audio storage config

src/agents/realtime/events.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from ..tool import Tool
88
from .agent import RealtimeAgent
99
from .items import RealtimeItem
10-
from .transport_events import RealtimeTransportAudioEvent, RealtimeTransportEvent
10+
from .model_events import RealtimeModelAudioEvent, RealtimeModelEvent
1111

1212

1313
@dataclass
@@ -96,7 +96,7 @@ class RealtimeToolEnd:
9696
class RealtimeRawTransportEvent:
9797
"""Forwards raw events from the transport layer."""
9898

99-
data: RealtimeTransportEvent
99+
data: RealtimeModelEvent
100100
"""The raw data from the transport layer."""
101101

102102
info: RealtimeEventInfo
@@ -119,7 +119,7 @@ class RealtimeAudioEnd:
119119
class RealtimeAudio:
120120
"""Triggered when the agent generates new audio to be played."""
121121

122-
audio: RealtimeTransportAudioEvent
122+
audio: RealtimeModelAudioEvent
123123
"""The audio event from the transport layer."""
124124

125125
info: RealtimeEventInfo

src/agents/realtime/model.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import abc
2+
from typing import Any, Callable
3+
4+
from typing_extensions import NotRequired, TypedDict
5+
6+
from ..util._types import MaybeAwaitable
7+
from .config import (
8+
RealtimeClientMessage,
9+
RealtimeSessionModelSettings,
10+
RealtimeUserInput,
11+
)
12+
from .model_events import RealtimeModelEvent, RealtimeModelToolCallEvent
13+
14+
15+
class RealtimeModelListener(abc.ABC):
16+
"""A listener for realtime transport events."""
17+
18+
@abc.abstractmethod
19+
async def on_event(self, event: RealtimeModelEvent) -> None:
20+
"""Called when an event is emitted by the realtime transport."""
21+
pass
22+
23+
24+
class RealtimeModelConfig(TypedDict):
25+
"""Options for connecting to a realtime model."""
26+
27+
api_key: NotRequired[str | Callable[[], MaybeAwaitable[str]]]
28+
"""The API key (or function that returns a key) to use when connecting. If unset, the model will
29+
try to use a sane default. For example, the OpenAI Realtime model will try to use the
30+
`OPENAI_API_KEY` environment variable.
31+
"""
32+
33+
url: NotRequired[str]
34+
"""The URL to use when connecting. If unset, the model will use a sane default. For example,
35+
the OpenAI Realtime model will use the default OpenAI WebSocket URL.
36+
"""
37+
38+
initial_model_settings: NotRequired[RealtimeSessionModelSettings]
39+
40+
41+
class RealtimeModel(abc.ABC):
42+
"""Interface for connecting to a realtime model and sending/receiving events."""
43+
44+
@abc.abstractmethod
45+
async def connect(self, options: RealtimeModelConfig) -> None:
46+
"""Establish a connection to the model and keep it alive."""
47+
pass
48+
49+
@abc.abstractmethod
50+
def add_listener(self, listener: RealtimeModelListener) -> None:
51+
"""Add a listener to the model."""
52+
pass
53+
54+
@abc.abstractmethod
55+
def remove_listener(self, listener: RealtimeModelListener) -> None:
56+
"""Remove a listener from the model."""
57+
pass
58+
59+
@abc.abstractmethod
60+
async def send_event(self, event: RealtimeClientMessage) -> None:
61+
"""Send an event to the model."""
62+
pass
63+
64+
@abc.abstractmethod
65+
async def send_message(
66+
self, message: RealtimeUserInput, other_event_data: dict[str, Any] | None = None
67+
) -> None:
68+
"""Send a message to the model."""
69+
pass
70+
71+
@abc.abstractmethod
72+
async def send_audio(self, audio: bytes, *, commit: bool = False) -> None:
73+
"""Send a raw audio chunk to the model.
74+
75+
Args:
76+
audio: The audio data to send.
77+
commit: Whether to commit the audio buffer to the model. If the model does not do turn
78+
detection, this can be used to indicate the turn is completed.
79+
"""
80+
pass
81+
82+
@abc.abstractmethod
83+
async def send_tool_output(
84+
self, tool_call: RealtimeModelToolCallEvent, output: str, start_response: bool
85+
) -> None:
86+
"""Send tool output to the model."""
87+
pass
88+
89+
@abc.abstractmethod
90+
async def interrupt(self) -> None:
91+
"""Interrupt the model. For example, could be triggered by a guardrail."""
92+
pass
93+
94+
@abc.abstractmethod
95+
async def close(self) -> None:
96+
"""Close the session."""
97+
pass

0 commit comments

Comments
 (0)