Skip to content

Commit 9101996

Browse files
committed
add WebRTC sample
1 parent 02b8e3b commit 9101996

File tree

5 files changed

+299
-39
lines changed

5 files changed

+299
-39
lines changed

docs/references.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,4 @@
7474
- [Realtime API with WebSocket](https://platform.openai.com/docs/guides/realtime-websocket)
7575
- [GPT-4o Realtime API for speech and audio](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/realtime-audio-quickstart?tabs=keyless%2Clinux&pivots=programming-language-python)
7676
- [OpenAI Python API library > examples/realtime](https://github.com/openai/openai-python/tree/main/examples/realtime)
77+
- [How to use the GPT-4o Realtime API via WebRTC](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-webrtc)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ dependencies = [
1212
"elasticsearch>=9.1.0",
1313
"fastapi[standard]>=0.116.1",
1414
"httpx>=0.28.1",
15+
"jinja2>=3.1.2",
1516
"langchain-azure-ai>=0.1.4",
1617
"langchain-community>=0.3.27",
1718
"langchain-mcp-adapters>=0.1.9",

scripts/realtime_operator.py

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
import asyncio
2+
import http.server
3+
import json
24
import logging
5+
import os
6+
import socketserver
7+
import tempfile
8+
import webbrowser
9+
from pathlib import Path
310

11+
# New imports for template rendering and serving
12+
import jinja2
413
import typer
514
from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
615
from dotenv import load_dotenv
@@ -19,7 +28,7 @@
1928
logger = get_logger(__name__)
2029

2130

22-
async def main() -> None:
31+
async def chat_impl() -> None:
2332
"""
2433
When prompted for user input, type a message and hit enter to send it to the model.
2534
Enter "q" to quit the conversation.
@@ -77,7 +86,7 @@ async def main() -> None:
7786

7887

7988
@app.command()
80-
def run(
89+
def chat(
8190
verbose: bool = typer.Option(
8291
False,
8392
"--verbose",
@@ -89,7 +98,76 @@ def run(
8998
if verbose:
9099
logger.setLevel(logging.DEBUG)
91100

92-
asyncio.run(main())
101+
asyncio.run(chat_impl())
102+
103+
104+
@app.command()
105+
def webrtc(
106+
template: str = typer.Option(
107+
"scripts/realtime_webrtc.html", "--template", "-t", help="Path to the HTML Jinja2 template"
108+
),
109+
host: str = typer.Option("0.0.0.0", "--host", "-h"),
110+
port: int = typer.Option(8080, "--port", "-p"),
111+
web_rtc_url: str = typer.Option(
112+
"https://eastus2.realtimeapi-preview.ai.azure.com/v1/realtimertc", "--webrtc-url", help="WebRTC endpoint URL"
113+
),
114+
sessions_url: str = typer.Option(
115+
"https://YourAzureOpenAIResourceName.openai.azure.com/openai/realtimeapi/sessions?api-version=2025-04-01-preview",
116+
"--sessions-url",
117+
help="Sessions API URL",
118+
),
119+
deployment: str = typer.Option("gpt-realtime", "--deployment", help="Deployment name"),
120+
voice: str = typer.Option("verse", "--voice", help="Voice name"),
121+
):
122+
"""
123+
Render the realtime_webrtc HTML template with provided parameters and serve it as a static site.
124+
125+
The template is a Jinja2 template and will receive the following variables:
126+
- WEBRTC_URL, SESSIONS_URL, API_KEY, DEPLOYMENT, VOICE
127+
"""
128+
settings = Settings()
129+
api_key = settings.azure_openai_api_key
130+
if not api_key:
131+
typer.secho(
132+
"Warning: no API key provided; the rendered page will contain an empty API key.", fg=typer.colors.YELLOW
133+
)
134+
135+
tpl_path = Path(template)
136+
if not tpl_path.exists():
137+
typer.secho(f"Template not found: {tpl_path}", fg=typer.colors.RED)
138+
raise typer.Exit(code=1)
139+
140+
tpl_text = tpl_path.read_text(encoding="utf-8")
141+
142+
# Use json.dumps to safely embed JS string literals in the template
143+
rendered = jinja2.Template(tpl_text).render(
144+
WEBRTC_URL=json.dumps(web_rtc_url),
145+
SESSIONS_URL=json.dumps(sessions_url),
146+
API_KEY=json.dumps(api_key),
147+
DEPLOYMENT=json.dumps(deployment),
148+
VOICE=json.dumps(voice),
149+
)
150+
151+
tempdir = tempfile.TemporaryDirectory()
152+
out_path = Path(tempdir.name) / "index.html"
153+
out_path.write_text(rendered, encoding="utf-8")
154+
155+
# Serve the temporary directory with the rendered HTML
156+
os.chdir(tempdir.name)
157+
handler = http.server.SimpleHTTPRequestHandler
158+
with socketserver.TCPServer((host, port), handler) as httpd:
159+
url = f"http://{host}:{port}/"
160+
typer.secho(f"Serving rendered template at: {url}", fg=typer.colors.GREEN)
161+
try:
162+
webbrowser.open(url)
163+
except Exception:
164+
pass
165+
try:
166+
httpd.serve_forever()
167+
except KeyboardInterrupt:
168+
typer.secho("Shutting down server...", fg=typer.colors.YELLOW)
169+
finally:
170+
tempdir.cleanup()
93171

94172

95173
if __name__ == "__main__":

scripts/realtime_webrtc.html

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8" />
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
6+
<title>Azure OpenAI Realtime Session</title>
7+
</head>
8+
<body>
9+
<h1>Azure OpenAI Realtime Session</h1>
10+
<p>
11+
WARNING: Don't use this code sample in production with the API key
12+
hardcoded. Use a protected backend service to call the sessions API and
13+
generate the ephemeral key. Then return the ephemeral key to the client.
14+
</p>
15+
<button onclick="StartSession()">Start Session</button>
16+
17+
<!-- Log container for API messages -->
18+
<div id="logContainer"></div>
19+
20+
<script>
21+
// The following constants are rendered from a Jinja2 template at runtime.
22+
// They must be valid JavaScript string literals (the CLI will json-encode them).
23+
const WEBRTC_URL = {{ WEBRTC_URL }};
24+
25+
// The SESSIONS_URL includes the Azure OpenAI resource URL,
26+
// deployment name, the /realtime/sessions path, and the API version.
27+
// The Azure OpenAI resource region isn't part of the SESSIONS_URL.
28+
const SESSIONS_URL = {{ SESSIONS_URL }};
29+
30+
// The API key of the Azure OpenAI resource. WARNING: do not expose this in
31+
// production. Use an ephemeral key instead.
32+
const API_KEY = {{ API_KEY }};
33+
34+
// The deployment name might not be the same as the model name.
35+
const DEPLOYMENT = {{ DEPLOYMENT }};
36+
const VOICE = {{ VOICE }};
37+
38+
async function StartSession() {
39+
try {
40+
// WARNING: Don't use this code sample in production
41+
// with the API key hardcoded.
42+
// Use a protected backend service to call the
43+
// sessions API and generate the ephemeral key.
44+
// Then return the ephemeral key to the client.
45+
46+
const response = await fetch(SESSIONS_URL, {
47+
method: "POST",
48+
headers: {
49+
//"Authorization": `Bearer ${ACCESS_TOKEN}`,
50+
"api-key": API_KEY,
51+
"Content-Type": "application/json",
52+
},
53+
body: JSON.stringify({
54+
model: DEPLOYMENT,
55+
voice: VOICE,
56+
}),
57+
});
58+
59+
if (!response.ok) {
60+
throw new Error(`API request failed`);
61+
}
62+
63+
const data = await response.json();
64+
65+
const sessionId = data.id;
66+
const ephemeralKey = data.client_secret?.value;
67+
console.error("Ephemeral key:", ephemeralKey);
68+
69+
// Mask the ephemeral key in the log message.
70+
logMessage("Ephemeral Key Received: " + "***");
71+
logMessage("WebRTC Session Id = " + sessionId);
72+
73+
// Set up the WebRTC connection using the ephemeral key.
74+
init(ephemeralKey);
75+
} catch (error) {
76+
console.error("Error fetching ephemeral key:", error);
77+
logMessage("Error fetching ephemeral key: " + error.message);
78+
}
79+
}
80+
81+
async function init(ephemeralKey) {
82+
let peerConnection = new RTCPeerConnection();
83+
84+
// Set up to play remote audio from the model.
85+
const audioElement = document.createElement("audio");
86+
audioElement.autoplay = true;
87+
document.body.appendChild(audioElement);
88+
89+
peerConnection.ontrack = (event) => {
90+
audioElement.srcObject = event.streams[0];
91+
};
92+
93+
// Set up data channel for sending and receiving events
94+
const clientMedia = await navigator.mediaDevices.getUserMedia({
95+
audio: true,
96+
});
97+
const audioTrack = clientMedia.getAudioTracks()[0];
98+
peerConnection.addTrack(audioTrack);
99+
100+
const dataChannel =
101+
peerConnection.createDataChannel("realtime-channel");
102+
103+
dataChannel.addEventListener("open", () => {
104+
logMessage("Data channel is open");
105+
updateSession(dataChannel);
106+
});
107+
108+
dataChannel.addEventListener("message", (event) => {
109+
const realtimeEvent = JSON.parse(event.data);
110+
console.log(realtimeEvent);
111+
logMessage(
112+
"Received server event: " + JSON.stringify(realtimeEvent, null, 2)
113+
);
114+
if (realtimeEvent.type === "session.update") {
115+
const instructions = realtimeEvent.session.instructions;
116+
logMessage("Instructions: " + instructions);
117+
} else if (realtimeEvent.type === "session.error") {
118+
logMessage("Error: " + realtimeEvent.error.message);
119+
} else if (realtimeEvent.type === "session.end") {
120+
logMessage("Session ended.");
121+
}
122+
});
123+
124+
dataChannel.addEventListener("close", () => {
125+
logMessage("Data channel is closed");
126+
});
127+
128+
// Start the session using the Session Description Protocol (SDP)
129+
const offer = await peerConnection.createOffer();
130+
await peerConnection.setLocalDescription(offer);
131+
132+
const sdpResponse = await fetch(`${WEBRTC_URL}?model=${DEPLOYMENT}`, {
133+
method: "POST",
134+
body: offer.sdp,
135+
headers: {
136+
Authorization: `Bearer ${ephemeralKey}`,
137+
"Content-Type": "application/sdp",
138+
},
139+
});
140+
141+
const answer = { type: "answer", sdp: await sdpResponse.text() };
142+
await peerConnection.setRemoteDescription(answer);
143+
144+
const button = document.createElement("button");
145+
button.innerText = "Close Session";
146+
button.onclick = stopSession;
147+
document.body.appendChild(button);
148+
149+
// Send a client event to update the session
150+
function updateSession(dataChannel) {
151+
const event = {
152+
type: "session.update",
153+
session: {
154+
instructions:
155+
"You are a helpful AI assistant responding in natural, engaging language.",
156+
},
157+
};
158+
dataChannel.send(JSON.stringify(event));
159+
logMessage("Sent client event: " + JSON.stringify(event, null, 2));
160+
}
161+
162+
function stopSession() {
163+
if (dataChannel) dataChannel.close();
164+
if (peerConnection) peerConnection.close();
165+
peerConnection = null;
166+
logMessage("Session closed.");
167+
}
168+
}
169+
170+
function logMessage(message) {
171+
const logContainer = document.getElementById("logContainer");
172+
const p = document.createElement("p");
173+
p.textContent = message;
174+
logContainer.appendChild(p);
175+
}
176+
</script>
177+
</body>
178+
</html>

0 commit comments

Comments
 (0)