Skip to content

Commit 755b78a

Browse files
committed
[8/n] Make realtime more like the rest of agents sdk
Key changes: 1. Transport -> model. 2. Extract any model settings into `RealtimeSessionModelSettings`. 3. RealtimeRunConfig, similar to the RunConfig in `run.py`. Next PR I'll update session to be better.
1 parent d2daf91 commit 755b78a

File tree

5 files changed

+169
-138
lines changed

5 files changed

+169
-138
lines changed

src/agents/realtime/__init__.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
2-
from .config import APIKeyOrKeyFunc
2+
from .config import (
3+
RealtimeAudioFormat,
4+
RealtimeClientMessage,
5+
RealtimeInputAudioTranscriptionConfig,
6+
RealtimeModelName,
7+
RealtimeRunConfig,
8+
RealtimeSessionModelSettings,
9+
RealtimeTurnDetectionConfig,
10+
RealtimeUserInput,
11+
RealtimeUserInputMessage,
12+
RealtimeUserInputText,
13+
)
314
from .events import (
415
RealtimeAgentEndEvent,
516
RealtimeAgentStartEvent,
@@ -15,13 +26,12 @@
1526
RealtimeToolEnd,
1627
RealtimeToolStart,
1728
)
18-
from .session import RealtimeSession
19-
from .transport import (
20-
RealtimeModelName,
21-
RealtimeSessionTransport,
22-
RealtimeTransportConnectionOptions,
23-
RealtimeTransportListener,
29+
from .model import (
30+
RealtimeModel,
31+
RealtimeModelConfig,
32+
RealtimeModelListener,
2433
)
34+
from .session import RealtimeSession
2535

2636
__all__ = [
2737
"RealtimeAgent",
@@ -30,11 +40,19 @@
3040
"RealtimeSession",
3141
"RealtimeSessionListener",
3242
"RealtimeSessionListenerFunc",
33-
"APIKeyOrKeyFunc",
43+
"RealtimeRunConfig",
44+
"RealtimeSessionModelSettings",
45+
"RealtimeInputAudioTranscriptionConfig",
46+
"RealtimeTurnDetectionConfig",
47+
"RealtimeAudioFormat",
48+
"RealtimeClientMessage",
49+
"RealtimeUserInput",
50+
"RealtimeUserInputMessage",
51+
"RealtimeUserInputText",
3452
"RealtimeModelName",
35-
"RealtimeSessionTransport",
36-
"RealtimeTransportListener",
37-
"RealtimeTransportConnectionOptions",
53+
"RealtimeModel",
54+
"RealtimeModelListener",
55+
"RealtimeModelConfig",
3856
"RealtimeSessionEvent",
3957
"RealtimeAgentStartEvent",
4058
"RealtimeAgentEndEvent",

src/agents/realtime/config.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,44 @@
11
from __future__ import annotations
22

3-
import inspect
43
from typing import (
54
Any,
6-
Callable,
75
Literal,
86
Union,
97
)
108

119
from typing_extensions import NotRequired, TypeAlias, TypedDict
1210

1311
from ..model_settings import ToolChoice
14-
from ..tool import FunctionTool
15-
from ..util._types import MaybeAwaitable
12+
from ..tool import Tool
13+
14+
RealtimeModelName: TypeAlias = Union[
15+
Literal[
16+
"gpt-4o-realtime-preview",
17+
"gpt-4o-mini-realtime-preview",
18+
"gpt-4o-realtime-preview-2025-06-03",
19+
"gpt-4o-realtime-preview-2024-12-17",
20+
"gpt-4o-realtime-preview-2024-10-01",
21+
"gpt-4o-mini-realtime-preview-2024-12-17",
22+
],
23+
str,
24+
]
25+
"""The name of a realtime model."""
1626

1727

1828
class RealtimeClientMessage(TypedDict):
1929
type: str # explicitly required
2030
other_data: NotRequired[dict[str, Any]]
2131

2232

23-
class UserInputText(TypedDict):
33+
class RealtimeUserInputText(TypedDict):
2434
type: Literal["input_text"]
2535
text: str
2636

2737

2838
class RealtimeUserInputMessage(TypedDict):
2939
type: Literal["message"]
3040
role: Literal["user"]
31-
content: list[UserInputText]
41+
content: list[RealtimeUserInputText]
3242

3343

3444
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
@@ -55,9 +65,11 @@ class RealtimeTurnDetectionConfig(TypedDict):
5565
threshold: NotRequired[float]
5666

5767

58-
class RealtimeSessionConfig(TypedDict):
59-
api_key: NotRequired[APIKeyOrKeyFunc]
60-
model: NotRequired[str]
68+
class RealtimeSessionModelSettings(TypedDict):
69+
"""Model settings for a realtime model session."""
70+
71+
model_name: NotRequired[RealtimeModelName]
72+
6173
instructions: NotRequired[str]
6274
modalities: NotRequired[list[Literal["text", "audio"]]]
6375
voice: NotRequired[str]
@@ -68,24 +80,13 @@ class RealtimeSessionConfig(TypedDict):
6880
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
6981

7082
tool_choice: NotRequired[ToolChoice]
71-
tools: NotRequired[list[FunctionTool]]
72-
73-
74-
APIKeyOrKeyFunc = str | Callable[[], MaybeAwaitable[str]]
75-
"""Either an API key or a function that returns an API key."""
76-
83+
tools: NotRequired[list[Tool]]
7784

78-
async def get_api_key(key: APIKeyOrKeyFunc | None) -> str | None:
79-
"""Get the API key from the key or key function."""
80-
if key is None:
81-
return None
82-
elif isinstance(key, str):
83-
return key
8485

85-
result = key()
86-
if inspect.isawaitable(result):
87-
return await result
88-
return result
86+
class RealtimeRunConfig(TypedDict):
87+
model_settings: NotRequired[RealtimeSessionModelSettings]
8988

9089
# TODO (rm) Add tracing support
9190
# tracing: NotRequired[RealtimeTracingConfig | None]
91+
# TODO (rm) Add guardrail support
92+
# TODO (rm) Add history audio storage config

src/agents/realtime/events.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from ..tool import Tool
88
from .agent import RealtimeAgent
99
from .items import RealtimeItem
10-
from .transport_events import RealtimeTransportAudioEvent, RealtimeTransportEvent
10+
from .model_events import RealtimeModelAudioEvent, RealtimeModelEvent
1111

1212

1313
@dataclass
@@ -96,7 +96,7 @@ class RealtimeToolEnd:
9696
class RealtimeRawTransportEvent:
9797
"""Forwards raw events from the transport layer."""
9898

99-
data: RealtimeTransportEvent
99+
data: RealtimeModelEvent
100100
"""The raw data from the transport layer."""
101101

102102
info: RealtimeEventInfo
@@ -119,7 +119,7 @@ class RealtimeAudioEnd:
119119
class RealtimeAudio:
120120
"""Triggered when the agent generates new audio to be played."""
121121

122-
audio: RealtimeTransportAudioEvent
122+
audio: RealtimeModelAudioEvent
123123
"""The audio event from the transport layer."""
124124

125125
info: RealtimeEventInfo

0 commit comments

Comments
 (0)