@@ -4,7 +4,7 @@ author: eric-urban
4
4
ms.author : eur
5
5
ms.service : azure-ai-openai
6
6
ms.topic : include
7
- ms.date : 1/21 /2025
7
+ ms.date : 3/20 /2025
8
8
---
9
9
10
10
## Prerequisites
@@ -38,10 +38,10 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
38
38
npm init -y
39
39
` ` `
40
40
41
- 1. Install the real-time audio client library for JavaScript with:
41
+ 1. Install the OpenAI client library for JavaScript with:
42
42
43
43
` ` ` console
44
- npm install https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.2/rt-client-0.5.2.tgz
44
+ npm install openai
45
45
` ` `
46
46
47
47
1. For the ** recommended** keyless authentication with Microsoft Entra ID, install the ` @azure/identity` package with:
@@ -50,6 +50,7 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
50
50
npm install @azure/identity
51
51
` ` `
52
52
53
+
53
54
# # Retrieve resource information
54
55
55
56
[! INCLUDE [resource authentication](resource-authentication.md)]
@@ -64,55 +65,70 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
64
65
1. Create the `index.js` file with the following code:
65
66
66
67
```javascript
67
- import { DefaultAzureCredential } from "@azure/identity";
68
- import { LowLevelRTClient } from "rt-client";
69
- import dotenv from "dotenv";
70
- dotenv.config();
71
- async function text_in_audio_out() {
72
- // Set environment variables or edit the corresponding values here.
73
- const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "YourEndpoint";
74
- const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
75
- if (!endpoint || !deployment) {
76
- throw new Error("You didn' t set the environment variables." );
77
- }
78
- const client = new LowLevelRTClient(new URL(endpoint), new DefaultAzureCredential(), { deployment: deployment });
79
- try {
80
- await client.send({
81
- type: " response.create" ,
82
- response: {
83
- modalities: [" audio" , " text" ],
84
- instructions: " Please assist the user."
85
- }
68
+ import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
69
+ import { AzureOpenAI } from "openai/index.mjs";
70
+ import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
71
+ async function main() {
72
+ // You will need to set these environment variables or edit the following values
73
+ const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
74
+ // Required Azure OpenAI deployment name and API version
75
+ const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
76
+ const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
77
+ // Keyless authentication
78
+ const credential = new DefaultAzureCredential();
79
+ const scope = "https://cognitiveservices.azure.com/.default";
80
+ const azureADTokenProvider = getBearerTokenProvider(credential, scope);
81
+ const azureOpenAIClient = new AzureOpenAI({
82
+ azureADTokenProvider,
83
+ apiVersion: apiVersion,
84
+ deployment: deploymentName,
85
+ endpoint: endpoint,
86
+ });
87
+ const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
88
+ realtimeClient.socket.on("open", () => {
89
+ console.log("Connection opened!");
90
+ realtimeClient.send({
91
+ type: "session.update",
92
+ session: {
93
+ modalities: ["text", "audio"],
94
+ model: "gpt-4o-mini-realtime-preview",
95
+ },
96
+ });
97
+ realtimeClient.send({
98
+ type: "conversation.item.create",
99
+ item: {
100
+ type: "message",
101
+ role: "user",
102
+ content: [{ type: "input_text", text: "Please assist the user" }],
103
+ },
86
104
});
87
- for await (const message of client.messages()) {
88
- switch (message.type) {
89
- case " response.done" : {
90
- break;
91
- }
92
- case " error" : {
93
- console.error(message.error);
94
- break;
95
- }
96
- case " response.audio_transcript.delta" : {
97
- console.log(` Received text delta: ${message.delta} ` );
98
- break;
99
- }
100
- case " response.audio.delta" : {
101
- const buffer = Buffer.from(message.delta, " base64" );
102
- console.log(` Received ${buffer.length} bytes of audio data.` );
103
- break;
104
- }
105
- }
106
- if (message.type === " response.done" || message.type === " error" ) {
107
- break;
108
- }
109
- }
110
- }
111
- finally {
112
- client.close();
113
- }
105
+ realtimeClient.send({ type: "response.create" });
106
+ });
107
+ realtimeClient.on("error", (err) => {
108
+ // Instead of throwing the error, you can log it
109
+ // and continue processing events.
110
+ throw err;
111
+ });
112
+ realtimeClient.on("session.created", (event) => {
113
+ console.log("session created!", event.session);
114
+ console.log();
115
+ });
116
+ realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
117
+ realtimeClient.on("response.audio.delta", (event) => {
118
+ const buffer = Buffer.from(event.delta, "base64");
119
+ console.log(`Received ${buffer.length} bytes of audio data.`);
120
+ });
121
+ realtimeClient.on("response.audio_transcript.delta", (event) => {
122
+ console.log(`Received text delta:${event.delta}.`);
123
+ });
124
+ realtimeClient.on("response.text.done", () => console.log());
125
+ realtimeClient.on("response.done", () => realtimeClient.close());
126
+ realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
114
127
}
115
- await text_in_audio_out();
128
+ main().catch((err) => {
129
+ console.error("The sample encountered an error:", err);
130
+ });
131
+ export { main };
116
132
```
117
133
118
134
1. Sign in to Azure with the following command:
@@ -132,56 +148,66 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
132
148
1. Create the `index.js` file with the following code:
133
149
134
150
```javascript
135
- import { AzureKeyCredential } from " @azure/core-auth" ;
136
- import { LowLevelRTClient } from " rt-client" ;
137
- import dotenv from " dotenv" ;
138
- dotenv.config();
139
- async function text_in_audio_out() {
140
- // Set environment variables or edit the corresponding values here.
151
+ import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
152
+ import { AzureOpenAI } from "openai/index.mjs";
153
+ async function main() {
154
+ // You will need to set these environment variables or edit the following values
155
+ const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
141
156
const apiKey = process.env.AZURE_OPENAI_API_KEY || "Your API key";
142
- const endpoint = process.env.AZURE_OPENAI_ENDPOINT || " Your endpoint" ;
143
- const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || " gpt-4o-mini-realtime-preview" ;
144
- if (!endpoint || !deployment) {
145
- throw new Error(" You didn' t set the environment variables.");
146
- }
147
- const client = new LowLevelRTClient(new URL(endpoint), new AzureKeyCredential(apiKey), { deployment: deployment });
148
- try {
149
- await client.send({
150
- type: "response.create",
151
- response: {
152
- modalities: ["audio", "text"],
153
- instructions: "Please assist the user."
154
- }
157
+ // Required Azure OpenAI deployment name and API version
158
+ const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
159
+ const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
160
+ const azureOpenAIClient = new AzureOpenAI({
161
+ apiKey: apiKey,
162
+ apiVersion: apiVersion,
163
+ deployment: deploymentName,
164
+ endpoint: endpoint,
165
+ });
166
+ const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
167
+ realtimeClient.socket.on("open", () => {
168
+ console.log("Connection opened!");
169
+ realtimeClient.send({
170
+ type: "session.update",
171
+ session: {
172
+ modalities: ["text", "audio"],
173
+ model: "gpt-4o-mini-realtime-preview",
174
+ },
155
175
});
156
- for await (const message of client.messages()) {
157
- switch (message.type) {
158
- case "response.done": {
159
- break;
160
- }
161
- case "error": {
162
- console.error(message.error);
163
- break;
164
- }
165
- case "response.audio_transcript.delta": {
166
- console.log(`Received text delta: ${message.delta}`);
167
- break;
168
- }
169
- case "response.audio.delta": {
170
- const buffer = Buffer.from(message.delta, "base64");
171
- console.log(`Received ${buffer.length} bytes of audio data.`);
172
- break;
173
- }
174
- }
175
- if (message.type === "response.done" || message.type === "error") {
176
- break;
177
- }
178
- }
179
- }
180
- finally {
181
- client.close();
182
- }
176
+ realtimeClient.send({
177
+ type: "conversation.item.create",
178
+ item: {
179
+ type: "message",
180
+ role: "user",
181
+ content: [{ type: "input_text", text: "Please assist the user" }],
182
+ },
183
+ });
184
+ realtimeClient.send({ type: "response.create" });
185
+ });
186
+ realtimeClient.on("error", (err) => {
187
+ // Instead of throwing the error, you can log it
188
+ // and continue processing events.
189
+ throw err;
190
+ });
191
+ realtimeClient.on("session.created", (event) => {
192
+ console.log("session created!", event.session);
193
+ console.log();
194
+ });
195
+ realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
196
+ realtimeClient.on("response.audio.delta", (event) => {
197
+ const buffer = Buffer.from(event.delta, "base64");
198
+ console.log(`Received ${buffer.length} bytes of audio data.`);
199
+ });
200
+ realtimeClient.on("response.audio_transcript.delta", (event) => {
201
+ console.log(`Received text delta:${event.delta}.`);
202
+ });
203
+ realtimeClient.on("response.text.done", () => console.log());
204
+ realtimeClient.on("response.done", () => realtimeClient.close());
205
+ realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
183
206
}
184
- await text_in_audio_out();
207
+ main().catch((err) => {
208
+ console.error("The sample encountered an error:", err);
209
+ });
210
+ export { main };
185
211
```
186
212
187
213
1. Run the JavaScript file.
@@ -201,22 +227,25 @@ The script gets a response from the model and prints the transcript and audio da
201
227
The output will look similar to the following:
202
228
203
229
```console
204
- Received text delta: Hello
205
- Received text delta: !
206
- Received text delta: How
207
- Received text delta: can
208
- Received text delta: I
230
+ Received text delta:Of.
231
+ Received text delta: course.
232
+ Received text delta:!.
233
+ Received text delta: How.
234
+ Received text delta: can.
209
235
Received 4800 bytes of audio data.
210
236
Received 7200 bytes of audio data.
211
- Received text delta: help
237
+ Received text delta: I.
212
238
Received 12000 bytes of audio data.
213
- Received text delta: you
214
- Received text delta: today
215
- Received text delta: ?
239
+ Received text delta: help.
240
+ Received text delta: you.
241
+ Received text delta: today.
242
+ Received text delta:?.
216
243
Received 12000 bytes of audio data.
217
244
Received 12000 bytes of audio data.
218
245
Received 12000 bytes of audio data.
219
- Received 24000 bytes of audio data.
246
+ Received 26400 bytes of audio data.
247
+
248
+ Connection closed!
220
249
```
221
250
222
251
## Web application sample
0 commit comments