Skip to content

Commit 4cb44e9

Browse files
committed
[8/n] Make realtime more like the rest of agents sdk
Key changes: 1. Transport -> model. 2. Extract any model settings into `RealtimeSessionModelSettings`. 3. RealtimeRunConfig, similar to the RunConfig in `run.py`. Next PR I'll update session to be better.
1 parent 38c5235 commit 4cb44e9

File tree

9 files changed

+308
-275
lines changed

9 files changed

+308
-275
lines changed

src/agents/realtime/__init__.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
2-
from .config import APIKeyOrKeyFunc
2+
from .config import (
3+
RealtimeAudioFormat,
4+
RealtimeClientMessage,
5+
RealtimeInputAudioTranscriptionConfig,
6+
RealtimeModelName,
7+
RealtimeRunConfig,
8+
RealtimeSessionModelSettings,
9+
RealtimeTurnDetectionConfig,
10+
RealtimeUserInput,
11+
RealtimeUserInputMessage,
12+
RealtimeUserInputText,
13+
)
314
from .events import (
415
RealtimeAgentEndEvent,
516
RealtimeAgentStartEvent,
@@ -15,13 +26,12 @@
1526
RealtimeToolEnd,
1627
RealtimeToolStart,
1728
)
18-
from .session import RealtimeSession
19-
from .transport import (
20-
RealtimeModelName,
21-
RealtimeSessionTransport,
22-
RealtimeTransportConnectionOptions,
23-
RealtimeTransportListener,
29+
from .model import (
30+
RealtimeModel,
31+
RealtimeModelConfig,
32+
RealtimeModelListener,
2433
)
34+
from .session import RealtimeSession
2535

2636
__all__ = [
2737
"RealtimeAgent",
@@ -30,11 +40,19 @@
3040
"RealtimeSession",
3141
"RealtimeSessionListener",
3242
"RealtimeSessionListenerFunc",
33-
"APIKeyOrKeyFunc",
43+
"RealtimeRunConfig",
44+
"RealtimeSessionModelSettings",
45+
"RealtimeInputAudioTranscriptionConfig",
46+
"RealtimeTurnDetectionConfig",
47+
"RealtimeAudioFormat",
48+
"RealtimeClientMessage",
49+
"RealtimeUserInput",
50+
"RealtimeUserInputMessage",
51+
"RealtimeUserInputText",
3452
"RealtimeModelName",
35-
"RealtimeSessionTransport",
36-
"RealtimeTransportListener",
37-
"RealtimeTransportConnectionOptions",
53+
"RealtimeModel",
54+
"RealtimeModelListener",
55+
"RealtimeModelConfig",
3856
"RealtimeSessionEvent",
3957
"RealtimeAgentStartEvent",
4058
"RealtimeAgentEndEvent",

src/agents/realtime/config.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,44 @@
11
from __future__ import annotations
22

3-
import inspect
43
from typing import (
54
Any,
6-
Callable,
75
Literal,
86
Union,
97
)
108

119
from typing_extensions import NotRequired, TypeAlias, TypedDict
1210

1311
from ..model_settings import ToolChoice
14-
from ..tool import FunctionTool
15-
from ..util._types import MaybeAwaitable
12+
from ..tool import Tool
13+
14+
RealtimeModelName: TypeAlias = Union[
15+
Literal[
16+
"gpt-4o-realtime-preview",
17+
"gpt-4o-mini-realtime-preview",
18+
"gpt-4o-realtime-preview-2025-06-03",
19+
"gpt-4o-realtime-preview-2024-12-17",
20+
"gpt-4o-realtime-preview-2024-10-01",
21+
"gpt-4o-mini-realtime-preview-2024-12-17",
22+
],
23+
str,
24+
]
25+
"""The name of a realtime model."""
1626

1727

1828
class RealtimeClientMessage(TypedDict):
1929
type: str # explicitly required
2030
other_data: NotRequired[dict[str, Any]]
2131

2232

23-
class UserInputText(TypedDict):
33+
class RealtimeUserInputText(TypedDict):
2434
type: Literal["input_text"]
2535
text: str
2636

2737

2838
class RealtimeUserInputMessage(TypedDict):
2939
type: Literal["message"]
3040
role: Literal["user"]
31-
content: list[UserInputText]
41+
content: list[RealtimeUserInputText]
3242

3343

3444
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
@@ -55,9 +65,11 @@ class RealtimeTurnDetectionConfig(TypedDict):
5565
threshold: NotRequired[float]
5666

5767

58-
class RealtimeSessionConfig(TypedDict):
59-
api_key: NotRequired[APIKeyOrKeyFunc]
60-
model: NotRequired[str]
68+
class RealtimeSessionModelSettings(TypedDict):
69+
"""Model settings for a realtime model session."""
70+
71+
model_name: NotRequired[RealtimeModelName]
72+
6173
instructions: NotRequired[str]
6274
modalities: NotRequired[list[Literal["text", "audio"]]]
6375
voice: NotRequired[str]
@@ -68,24 +80,13 @@ class RealtimeSessionConfig(TypedDict):
6880
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
6981

7082
tool_choice: NotRequired[ToolChoice]
71-
tools: NotRequired[list[FunctionTool]]
72-
73-
74-
APIKeyOrKeyFunc = str | Callable[[], MaybeAwaitable[str]]
75-
"""Either an API key or a function that returns an API key."""
76-
83+
tools: NotRequired[list[Tool]]
7784

78-
async def get_api_key(key: APIKeyOrKeyFunc | None) -> str | None:
79-
"""Get the API key from the key or key function."""
80-
if key is None:
81-
return None
82-
elif isinstance(key, str):
83-
return key
8485

85-
result = key()
86-
if inspect.isawaitable(result):
87-
return await result
88-
return result
86+
class RealtimeRunConfig(TypedDict):
87+
model_settings: NotRequired[RealtimeSessionModelSettings]
8988

9089
# TODO (rm) Add tracing support
9190
# tracing: NotRequired[RealtimeTracingConfig | None]
91+
# TODO (rm) Add guardrail support
92+
# TODO (rm) Add history audio storage config

src/agents/realtime/events.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from ..tool import Tool
88
from .agent import RealtimeAgent
99
from .items import RealtimeItem
10-
from .transport_events import RealtimeTransportAudioEvent, RealtimeTransportEvent
10+
from .model_events import RealtimeModelAudioEvent, RealtimeModelEvent
1111

1212

1313
@dataclass
@@ -96,7 +96,7 @@ class RealtimeToolEnd:
9696
class RealtimeRawTransportEvent:
9797
"""Forwards raw events from the transport layer."""
9898

99-
data: RealtimeTransportEvent
99+
data: RealtimeModelEvent
100100
"""The raw data from the transport layer."""
101101

102102
info: RealtimeEventInfo
@@ -119,7 +119,7 @@ class RealtimeAudioEnd:
119119
class RealtimeAudio:
120120
"""Triggered when the agent generates new audio to be played."""
121121

122-
audio: RealtimeTransportAudioEvent
122+
audio: RealtimeModelAudioEvent
123123
"""The audio event from the transport layer."""
124124

125125
info: RealtimeEventInfo

src/agents/realtime/model.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import abc
2+
from typing import Any, Callable
3+
4+
from typing_extensions import NotRequired, TypedDict
5+
6+
from ..util._types import MaybeAwaitable
7+
from .config import (
8+
RealtimeClientMessage,
9+
RealtimeSessionModelSettings,
10+
RealtimeUserInput,
11+
)
12+
from .model_events import RealtimeModelEvent, RealtimeModelToolCallEvent
13+
14+
15+
class RealtimeModelListener(abc.ABC):
16+
"""A listener for realtime transport events."""
17+
18+
@abc.abstractmethod
19+
async def on_event(self, event: RealtimeModelEvent) -> None:
20+
"""Called when an event is emitted by the realtime transport."""
21+
pass
22+
23+
24+
class RealtimeModelConfig(TypedDict):
25+
"""Options for connecting to a realtime model."""
26+
27+
api_key: NotRequired[str | Callable[[], MaybeAwaitable[str]]]
28+
"""The API key (or function that returns a key) to use when connecting. If unset, the model will
29+
try to use a sane default. For example, the OpenAI Realtime model will try to use the
30+
`OPENAI_API_KEY` environment variable.
31+
"""
32+
33+
url: NotRequired[str]
34+
"""The URL to use when connecting. If unset, the model will use a sane default. For example,
35+
the OpenAI Realtime model will use the default OpenAI WebSocket URL.
36+
"""
37+
38+
initial_model_settings: NotRequired[RealtimeSessionModelSettings]
39+
40+
41+
class RealtimeModel(abc.ABC):
42+
"""Interface for connecting to a realtime model and sending/receiving events."""
43+
44+
@abc.abstractmethod
45+
async def connect(self, options: RealtimeModelConfig) -> None:
46+
"""Establish a connection to the model and keep it alive."""
47+
pass
48+
49+
@abc.abstractmethod
50+
def add_listener(self, listener: RealtimeModelListener) -> None:
51+
"""Add a listener to the model."""
52+
pass
53+
54+
@abc.abstractmethod
55+
async def remove_listener(self, listener: RealtimeModelListener) -> None:
56+
"""Remove a listener from the model."""
57+
pass
58+
59+
@abc.abstractmethod
60+
async def send_event(self, event: RealtimeClientMessage) -> None:
61+
"""Send an event to the model."""
62+
pass
63+
64+
@abc.abstractmethod
65+
async def send_message(
66+
self, message: RealtimeUserInput, other_event_data: dict[str, Any] | None = None
67+
) -> None:
68+
"""Send a message to the model."""
69+
pass
70+
71+
@abc.abstractmethod
72+
async def send_audio(self, audio: bytes, *, commit: bool = False) -> None:
73+
"""Send a raw audio chunk to the model.
74+
75+
Args:
76+
audio: The audio data to send.
77+
commit: Whether to commit the audio buffer to the model. If the model does not do turn
78+
detection, this can be used to indicate the turn is completed.
79+
"""
80+
pass
81+
82+
@abc.abstractmethod
83+
async def send_tool_output(
84+
self, tool_call: RealtimeModelToolCallEvent, output: str, start_response: bool
85+
) -> None:
86+
"""Send tool output to the model."""
87+
pass
88+
89+
@abc.abstractmethod
90+
async def interrupt(self) -> None:
91+
"""Interrupt the model. For example, could be triggered by a guardrail."""
92+
pass
93+
94+
@abc.abstractmethod
95+
async def close(self) -> None:
96+
"""Close the session."""
97+
pass

0 commit comments

Comments
 (0)