Skip to content

Commit 7f35622

Browse files
committed
[8/n] Make realtime more like the rest of agents sdk
Key changes: 1. Transport -> model. 2. Extract any model settings into `RealtimeSessionModelSettings`. 3. RealtimeRunConfig, similar to the RunConfig in `run.py`. Next PR I'll update session to be better.
1 parent 38c5235 commit 7f35622

File tree

11 files changed

+636
-531
lines changed

11 files changed

+636
-531
lines changed

examples/realtime/demo.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
import numpy as np
77

8+
from agents.realtime import RealtimeSession
9+
810
# Add the current directory to path so we can import ui
911
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
1012

1113
from agents import function_tool
12-
from agents.realtime import RealtimeAgent, RealtimeSession, RealtimeSessionEvent
14+
from agents.realtime import RealtimeAgent, RealtimeRunner, RealtimeSessionEvent
1315

1416
if TYPE_CHECKING:
1517
from .ui import AppUI
@@ -38,23 +40,34 @@ def get_weather(city: str) -> str:
3840

3941
class Example:
4042
def __init__(self) -> None:
41-
self.session = RealtimeSession(agent)
4243
self.ui = AppUI()
4344
self.ui.connected = asyncio.Event()
4445
self.ui.last_audio_item_id = None
4546
# Set the audio callback
4647
self.ui.set_audio_callback(self.on_audio_recorded)
4748

49+
self.session: RealtimeSession | None = None
50+
4851
async def run(self) -> None:
49-
self.session.add_listener(self.on_event)
50-
await self.session.connect()
51-
self.ui.set_is_connected(True)
52-
await self.ui.run_async()
52+
# Start UI in a separate task instead of waiting for it to complete
53+
ui_task = asyncio.create_task(self.ui.run_async())
54+
55+
# Set up session immediately without waiting for UI to finish
56+
runner = RealtimeRunner(agent)
57+
async with await runner.run() as session:
58+
self.session = session
59+
self.ui.set_is_connected(True)
60+
async for event in session:
61+
await self.on_event(event)
62+
63+
# Wait for UI task to complete when session ends
64+
await ui_task
5365

5466
async def on_audio_recorded(self, audio_bytes: bytes) -> None:
5567
"""Called when audio is recorded by the UI."""
5668
try:
5769
# Send the audio to the session
70+
assert self.session is not None
5871
await self.session.send_audio(audio_bytes)
5972
except Exception as e:
6073
self.ui.log_message(f"Error sending audio: {e}")

src/agents/realtime/__init__.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
2-
from .config import APIKeyOrKeyFunc
2+
from .config import (
3+
RealtimeAudioFormat,
4+
RealtimeClientMessage,
5+
RealtimeInputAudioTranscriptionConfig,
6+
RealtimeModelName,
7+
RealtimeRunConfig,
8+
RealtimeSessionModelSettings,
9+
RealtimeTurnDetectionConfig,
10+
RealtimeUserInput,
11+
RealtimeUserInputMessage,
12+
RealtimeUserInputText,
13+
)
14+
from .connection import RealtimeSession
315
from .events import (
416
RealtimeAgentEndEvent,
517
RealtimeAgentStartEvent,
@@ -15,26 +27,33 @@
1527
RealtimeToolEnd,
1628
RealtimeToolStart,
1729
)
18-
from .session import RealtimeSession
19-
from .transport import (
20-
RealtimeModelName,
21-
RealtimeSessionTransport,
22-
RealtimeTransportConnectionOptions,
23-
RealtimeTransportListener,
30+
from .model import (
31+
RealtimeModel,
32+
RealtimeModelConfig,
33+
RealtimeModelListener,
2434
)
35+
from .runner import RealtimeRunner
2536

2637
__all__ = [
2738
"RealtimeAgent",
2839
"RealtimeAgentHooks",
2940
"RealtimeRunHooks",
30-
"RealtimeSession",
41+
"RealtimeRunner",
3142
"RealtimeSessionListener",
3243
"RealtimeSessionListenerFunc",
33-
"APIKeyOrKeyFunc",
44+
"RealtimeRunConfig",
45+
"RealtimeSessionModelSettings",
46+
"RealtimeInputAudioTranscriptionConfig",
47+
"RealtimeTurnDetectionConfig",
48+
"RealtimeAudioFormat",
49+
"RealtimeClientMessage",
50+
"RealtimeUserInput",
51+
"RealtimeUserInputMessage",
52+
"RealtimeUserInputText",
3453
"RealtimeModelName",
35-
"RealtimeSessionTransport",
36-
"RealtimeTransportListener",
37-
"RealtimeTransportConnectionOptions",
54+
"RealtimeModel",
55+
"RealtimeModelListener",
56+
"RealtimeModelConfig",
3857
"RealtimeSessionEvent",
3958
"RealtimeAgentStartEvent",
4059
"RealtimeAgentEndEvent",
@@ -48,4 +67,5 @@
4867
"RealtimeError",
4968
"RealtimeHistoryUpdated",
5069
"RealtimeHistoryAdded",
70+
"RealtimeSession",
5171
]

src/agents/realtime/config.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,44 @@
11
from __future__ import annotations
22

3-
import inspect
43
from typing import (
54
Any,
6-
Callable,
75
Literal,
86
Union,
97
)
108

119
from typing_extensions import NotRequired, TypeAlias, TypedDict
1210

1311
from ..model_settings import ToolChoice
14-
from ..tool import FunctionTool
15-
from ..util._types import MaybeAwaitable
12+
from ..tool import Tool
13+
14+
RealtimeModelName: TypeAlias = Union[
15+
Literal[
16+
"gpt-4o-realtime-preview",
17+
"gpt-4o-mini-realtime-preview",
18+
"gpt-4o-realtime-preview-2025-06-03",
19+
"gpt-4o-realtime-preview-2024-12-17",
20+
"gpt-4o-realtime-preview-2024-10-01",
21+
"gpt-4o-mini-realtime-preview-2024-12-17",
22+
],
23+
str,
24+
]
25+
"""The name of a realtime model."""
1626

1727

1828
class RealtimeClientMessage(TypedDict):
1929
type: str # explicitly required
2030
other_data: NotRequired[dict[str, Any]]
2131

2232

23-
class UserInputText(TypedDict):
33+
class RealtimeUserInputText(TypedDict):
2434
type: Literal["input_text"]
2535
text: str
2636

2737

2838
class RealtimeUserInputMessage(TypedDict):
2939
type: Literal["message"]
3040
role: Literal["user"]
31-
content: list[UserInputText]
41+
content: list[RealtimeUserInputText]
3242

3343

3444
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
@@ -55,9 +65,11 @@ class RealtimeTurnDetectionConfig(TypedDict):
5565
threshold: NotRequired[float]
5666

5767

58-
class RealtimeSessionConfig(TypedDict):
59-
api_key: NotRequired[APIKeyOrKeyFunc]
60-
model: NotRequired[str]
68+
class RealtimeSessionModelSettings(TypedDict):
69+
"""Model settings for a realtime model session."""
70+
71+
model_name: NotRequired[RealtimeModelName]
72+
6173
instructions: NotRequired[str]
6274
modalities: NotRequired[list[Literal["text", "audio"]]]
6375
voice: NotRequired[str]
@@ -68,24 +80,13 @@ class RealtimeSessionConfig(TypedDict):
6880
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
6981

7082
tool_choice: NotRequired[ToolChoice]
71-
tools: NotRequired[list[FunctionTool]]
72-
73-
74-
APIKeyOrKeyFunc = str | Callable[[], MaybeAwaitable[str]]
75-
"""Either an API key or a function that returns an API key."""
76-
83+
tools: NotRequired[list[Tool]]
7784

78-
async def get_api_key(key: APIKeyOrKeyFunc | None) -> str | None:
79-
"""Get the API key from the key or key function."""
80-
if key is None:
81-
return None
82-
elif isinstance(key, str):
83-
return key
8485

85-
result = key()
86-
if inspect.isawaitable(result):
87-
return await result
88-
return result
86+
class RealtimeRunConfig(TypedDict):
87+
model_settings: NotRequired[RealtimeSessionModelSettings]
8988

9089
# TODO (rm) Add tracing support
9190
# tracing: NotRequired[RealtimeTracingConfig | None]
91+
# TODO (rm) Add guardrail support
92+
# TODO (rm) Add history audio storage config

0 commit comments

Comments
 (0)