Skip to content

Commit afc5943

Browse files
authored
Merge branch 'main' into enhancement/add-streaming-inner-events
2 parents ffaf0a5 + 6293d66 commit afc5943

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+6211
-110
lines changed

.vscode/launch.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": "Python Debugger: Python File",
9+
"type": "debugpy",
10+
"request": "launch",
11+
"program": "${file}"
12+
}
13+
]
14+
}

docs/examples.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,6 @@ Check out a variety of sample implementations of the SDK in the examples section
4040

4141
- **[voice](https://github.com/openai/openai-agents-python/tree/main/examples/voice):**
4242
See examples of voice agents, using our TTS and STT models.
43+
44+
- **[realtime](https://github.com/openai/openai-agents-python/tree/main/examples/realtime):**
45+
Examples showing how to build realtime experiences using the SDK.

docs/release.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ We will increment `Z` for non-breaking changes:
1919

2020
## Breaking change changelog
2121

22+
### 0.2.0
23+
24+
In this version, a few places that used to take `Agent` as an arg, now take `AgentBase` as an arg instead. For example, the `list_tools()` call in MCP servers. This is a purely typing change, you will still receive `Agent` objects. To update, just fix type errors by replacing `Agent` with `AgentBase`.
25+
2226
### 0.1.0
2327

2428
In this version, [`MCPServer.list_tools()`][agents.mcp.server.MCPServer] has two new params: `run_context` and `agent`. You'll need to add these params to any classes that subclass `MCPServer`.

docs/sessions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ result = await Runner.run(
101101
print(f"Agent: {result.final_output}")
102102

103103
# User wants to correct their question
104-
user_item = await session.pop_item() # Remove user's question
105104
assistant_item = await session.pop_item() # Remove agent's response
105+
user_item = await session.pop_item() # Remove user's question
106106

107107
# Ask a corrected question
108108
result = await Runner.run(

docs/tools.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ Sometimes, you don't want to use a Python function as a tool. You can directly c
180180
- `name`
181181
- `description`
182182
- `params_json_schema`, which is the JSON schema for the arguments
183-
- `on_invoke_tool`, which is an async function that receives the context and the arguments as a JSON string, and must return the tool output as a string.
183+
- `on_invoke_tool`, which is an async function that receives a [`ToolContext`][agents.tool_context.ToolContext] and the arguments as a JSON string, and must return the tool output as a string.
184184

185185
```python
186186
from typing import Any

examples/agent_patterns/llm_as_a_judge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class EvaluationFeedback:
3232
instructions=(
3333
"You evaluate a story outline and decide if it's good enough."
3434
"If it's not good enough, you provide feedback on what needs to be improved."
35-
"Never give it a pass on the first try."
35+
"Never give it a pass on the first try. After 5 attempts, you can give it a pass if story outline is good enough - do not go for perfection"
3636
),
3737
output_type=EvaluationFeedback,
3838
)

examples/mcp/prompt_server/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ async def get_instructions_from_prompt(mcp_server: MCPServer, prompt_name: str,
1717
try:
1818
prompt_result = await mcp_server.get_prompt(prompt_name, kwargs)
1919
content = prompt_result.messages[0].content
20-
if hasattr(content, 'text'):
20+
if hasattr(content, "text"):
2121
instructions = content.text
2222
else:
2323
instructions = str(content)

examples/realtime/demo.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import asyncio
2+
import os
3+
import sys
4+
from typing import TYPE_CHECKING
5+
6+
import numpy as np
7+
8+
from agents.realtime import RealtimeSession
9+
10+
# Add the current directory to path so we can import ui
11+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
12+
13+
from agents import function_tool
14+
from agents.realtime import RealtimeAgent, RealtimeRunner, RealtimeSessionEvent
15+
16+
if TYPE_CHECKING:
17+
from .ui import AppUI
18+
else:
19+
# Try both import styles
20+
try:
21+
# Try relative import first (when used as a package)
22+
from .ui import AppUI
23+
except ImportError:
24+
# Fall back to direct import (when run as a script)
25+
from ui import AppUI
26+
27+
28+
@function_tool
29+
def get_weather(city: str) -> str:
30+
"""Get the weather in a city."""
31+
return f"The weather in {city} is sunny."
32+
33+
34+
agent = RealtimeAgent(
35+
name="Assistant",
36+
instructions="You always greet the user with 'Top of the morning to you'.",
37+
tools=[get_weather],
38+
)
39+
40+
41+
def _truncate_str(s: str, max_length: int) -> str:
42+
if len(s) > max_length:
43+
return s[:max_length] + "..."
44+
return s
45+
46+
47+
class Example:
48+
def __init__(self) -> None:
49+
self.ui = AppUI()
50+
self.ui.connected = asyncio.Event()
51+
self.ui.last_audio_item_id = None
52+
# Set the audio callback
53+
self.ui.set_audio_callback(self.on_audio_recorded)
54+
55+
self.session: RealtimeSession | None = None
56+
57+
async def run(self) -> None:
58+
# Start UI in a separate task instead of waiting for it to complete
59+
ui_task = asyncio.create_task(self.ui.run_async())
60+
61+
# Set up session immediately without waiting for UI to finish
62+
runner = RealtimeRunner(agent)
63+
async with await runner.run() as session:
64+
self.session = session
65+
self.ui.set_is_connected(True)
66+
async for event in session:
67+
await self._on_event(event)
68+
print("done")
69+
70+
# Wait for UI task to complete when session ends
71+
await ui_task
72+
73+
async def on_audio_recorded(self, audio_bytes: bytes) -> None:
74+
# Send the audio to the session
75+
assert self.session is not None
76+
await self.session.send_audio(audio_bytes)
77+
78+
async def _on_event(self, event: RealtimeSessionEvent) -> None:
79+
try:
80+
if event.type == "agent_start":
81+
self.ui.add_transcript(f"Agent started: {event.agent.name}")
82+
elif event.type == "agent_end":
83+
self.ui.add_transcript(f"Agent ended: {event.agent.name}")
84+
elif event.type == "handoff":
85+
self.ui.add_transcript(
86+
f"Handoff from {event.from_agent.name} to {event.to_agent.name}"
87+
)
88+
elif event.type == "tool_start":
89+
self.ui.add_transcript(f"Tool started: {event.tool.name}")
90+
elif event.type == "tool_end":
91+
self.ui.add_transcript(f"Tool ended: {event.tool.name}; output: {event.output}")
92+
elif event.type == "audio_end":
93+
self.ui.add_transcript("Audio ended")
94+
elif event.type == "audio":
95+
np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
96+
self.ui.play_audio(np_audio)
97+
elif event.type == "audio_interrupted":
98+
self.ui.add_transcript("Audio interrupted")
99+
elif event.type == "error":
100+
pass
101+
elif event.type == "history_updated":
102+
pass
103+
elif event.type == "history_added":
104+
pass
105+
elif event.type == "raw_model_event":
106+
if event.data.type != "error" and event.data.type != "exception":
107+
self.ui.log_message(f"Raw model event: {event.data}")
108+
else:
109+
self.ui.log_message(f"Unknown event type: {event.type}")
110+
except Exception as e:
111+
self.ui.log_message(f"Error processing event: {_truncate_str(str(e), 50)}")
112+
113+
114+
if __name__ == "__main__":
115+
example = Example()
116+
asyncio.run(example.run())

examples/realtime/no_ui_demo.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import asyncio
2+
import sys
3+
4+
import numpy as np
5+
import sounddevice as sd
6+
7+
from agents import function_tool
8+
from agents.realtime import RealtimeAgent, RealtimeRunner, RealtimeSession, RealtimeSessionEvent
9+
10+
# Audio configuration
11+
CHUNK_LENGTH_S = 0.05 # 50ms
12+
SAMPLE_RATE = 24000
13+
FORMAT = np.int16
14+
CHANNELS = 1
15+
16+
# Set up logging for OpenAI agents SDK
17+
# logging.basicConfig(
18+
# level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
19+
# )
20+
# logger.logger.setLevel(logging.ERROR)
21+
22+
23+
@function_tool
24+
def get_weather(city: str) -> str:
25+
"""Get the weather in a city."""
26+
return f"The weather in {city} is sunny."
27+
28+
29+
agent = RealtimeAgent(
30+
name="Assistant",
31+
instructions="You always greet the user with 'Top of the morning to you'.",
32+
tools=[get_weather],
33+
)
34+
35+
36+
def _truncate_str(s: str, max_length: int) -> str:
37+
if len(s) > max_length:
38+
return s[:max_length] + "..."
39+
return s
40+
41+
42+
class NoUIDemo:
43+
def __init__(self) -> None:
44+
self.session: RealtimeSession | None = None
45+
self.audio_stream: sd.InputStream | None = None
46+
self.audio_player: sd.OutputStream | None = None
47+
self.recording = False
48+
49+
async def run(self) -> None:
50+
print("Connecting, may take a few seconds...")
51+
52+
# Initialize audio player
53+
self.audio_player = sd.OutputStream(
54+
channels=CHANNELS,
55+
samplerate=SAMPLE_RATE,
56+
dtype=FORMAT,
57+
)
58+
self.audio_player.start()
59+
60+
try:
61+
runner = RealtimeRunner(agent)
62+
async with await runner.run() as session:
63+
self.session = session
64+
print("Connected. Starting audio recording...")
65+
66+
# Start audio recording
67+
await self.start_audio_recording()
68+
print("Audio recording started. You can start speaking - expect lots of logs!")
69+
70+
# Process session events
71+
async for event in session:
72+
await self._on_event(event)
73+
74+
finally:
75+
# Clean up audio player
76+
if self.audio_player and self.audio_player.active:
77+
self.audio_player.stop()
78+
if self.audio_player:
79+
self.audio_player.close()
80+
81+
print("Session ended")
82+
83+
async def start_audio_recording(self) -> None:
84+
"""Start recording audio from the microphone."""
85+
# Set up audio input stream
86+
self.audio_stream = sd.InputStream(
87+
channels=CHANNELS,
88+
samplerate=SAMPLE_RATE,
89+
dtype=FORMAT,
90+
)
91+
92+
self.audio_stream.start()
93+
self.recording = True
94+
95+
# Start audio capture task
96+
asyncio.create_task(self.capture_audio())
97+
98+
async def capture_audio(self) -> None:
99+
"""Capture audio from the microphone and send to the session."""
100+
if not self.audio_stream or not self.session:
101+
return
102+
103+
# Buffer size in samples
104+
read_size = int(SAMPLE_RATE * CHUNK_LENGTH_S)
105+
106+
try:
107+
while self.recording:
108+
# Check if there's enough data to read
109+
if self.audio_stream.read_available < read_size:
110+
await asyncio.sleep(0.01)
111+
continue
112+
113+
# Read audio data
114+
data, _ = self.audio_stream.read(read_size)
115+
116+
# Convert numpy array to bytes
117+
audio_bytes = data.tobytes()
118+
119+
# Send audio to session
120+
await self.session.send_audio(audio_bytes)
121+
122+
# Yield control back to event loop
123+
await asyncio.sleep(0)
124+
125+
except Exception as e:
126+
print(f"Audio capture error: {e}")
127+
finally:
128+
if self.audio_stream and self.audio_stream.active:
129+
self.audio_stream.stop()
130+
if self.audio_stream:
131+
self.audio_stream.close()
132+
133+
async def _on_event(self, event: RealtimeSessionEvent) -> None:
134+
"""Handle session events."""
135+
try:
136+
if event.type == "agent_start":
137+
print(f"Agent started: {event.agent.name}")
138+
elif event.type == "agent_end":
139+
print(f"Agent ended: {event.agent.name}")
140+
elif event.type == "handoff":
141+
print(f"Handoff from {event.from_agent.name} to {event.to_agent.name}")
142+
elif event.type == "tool_start":
143+
print(f"Tool started: {event.tool.name}")
144+
elif event.type == "tool_end":
145+
print(f"Tool ended: {event.tool.name}; output: {event.output}")
146+
elif event.type == "audio_end":
147+
print("Audio ended")
148+
elif event.type == "audio":
149+
# Play audio through speakers
150+
np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
151+
if self.audio_player:
152+
try:
153+
self.audio_player.write(np_audio)
154+
except Exception as e:
155+
print(f"Audio playback error: {e}")
156+
elif event.type == "audio_interrupted":
157+
print("Audio interrupted")
158+
elif event.type == "error":
159+
print(f"Error: {event.error}")
160+
elif event.type == "history_updated":
161+
pass # Skip these frequent events
162+
elif event.type == "history_added":
163+
pass # Skip these frequent events
164+
elif event.type == "raw_model_event":
165+
print(f"Raw model event: {_truncate_str(str(event.data), 50)}")
166+
else:
167+
print(f"Unknown event type: {event.type}")
168+
except Exception as e:
169+
print(f"Error processing event: {_truncate_str(str(e), 50)}")
170+
171+
172+
if __name__ == "__main__":
173+
demo = NoUIDemo()
174+
try:
175+
asyncio.run(demo.run())
176+
except KeyboardInterrupt:
177+
print("\nExiting...")
178+
sys.exit(0)

0 commit comments

Comments
 (0)