Skip to content

Commit e6d6187

Browse files
committed
update javascript and typescript quickstarts
1 parent 19faaef commit e6d6187

File tree

5 files changed

+300
-227
lines changed

5 files changed

+300
-227
lines changed

articles/ai-services/openai/how-to/realtime-audio.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description: Learn how to use the GPT-4o Realtime API for speech and audio with
55
manager: nitinme
66
ms.service: azure-ai-openai
77
ms.topic: how-to
8-
ms.date: 12/20/2024
8+
ms.date: 3/20/2025
99
author: eric-urban
1010
ms.author: eur
1111
ms.custom: references_regions

articles/ai-services/openai/includes/realtime-javascript.md

Lines changed: 140 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
3838
npm init -y
3939
```
4040

41-
1. Install the real-time audio client library for JavaScript with:
41+
1. Install the OpenAI client library for JavaScript with:
4242

4343
```console
44-
npm install https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.2/rt-client-0.5.2.tgz
44+
npm install openai
4545
```
4646

4747
1. For the **recommended** keyless authentication with Microsoft Entra ID, install the `@azure/identity` package with:
@@ -50,6 +50,12 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
5050
npm install @azure/identity
5151
```
5252

53+
1. Install the websocket client library for JavaScript with:
54+
55+
```console
56+
npm install ws
57+
```
58+
5359
## Retrieve resource information
5460

5561
[!INCLUDE [resource authentication](resource-authentication.md)]
@@ -64,55 +70,70 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
6470
1. Create the `index.js` file with the following code:
6571
6672
```javascript
67-
import { DefaultAzureCredential } from "@azure/identity";
68-
import { LowLevelRTClient } from "rt-client";
69-
import dotenv from "dotenv";
70-
dotenv.config();
71-
async function text_in_audio_out() {
72-
// Set environment variables or edit the corresponding values here.
73-
const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "YourEndpoint";
74-
const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
75-
if (!endpoint || !deployment) {
76-
throw new Error("You didn't set the environment variables.");
77-
}
78-
const client = new LowLevelRTClient(new URL(endpoint), new DefaultAzureCredential(), { deployment: deployment });
79-
try {
80-
await client.send({
81-
type: "response.create",
82-
response: {
83-
modalities: ["audio", "text"],
84-
instructions: "Please assist the user."
85-
}
73+
import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
74+
import { AzureOpenAI } from "openai/index.mjs";
75+
import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
76+
async function main() {
77+
// You will need to set these environment variables or edit the following values
78+
const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
79+
// Required Azure OpenAI deployment name and API version
80+
const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
81+
const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
82+
// Keyless authentication
83+
const credential = new DefaultAzureCredential();
84+
const scope = "https://cognitiveservices.azure.com/.default";
85+
const azureADTokenProvider = getBearerTokenProvider(credential, scope);
86+
const azureOpenAIClient = new AzureOpenAI({
87+
azureADTokenProvider,
88+
apiVersion: apiVersion,
89+
deployment: deploymentName,
90+
endpoint: endpoint,
91+
});
92+
const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
93+
realtimeClient.socket.on("open", () => {
94+
console.log("Connection opened!");
95+
realtimeClient.send({
96+
type: "session.update",
97+
session: {
98+
modalities: ["text", "audio"],
99+
model: "gpt-4o-mini-realtime-preview",
100+
},
86101
});
87-
for await (const message of client.messages()) {
88-
switch (message.type) {
89-
case "response.done": {
90-
break;
91-
}
92-
case "error": {
93-
console.error(message.error);
94-
break;
95-
}
96-
case "response.audio_transcript.delta": {
97-
console.log(`Received text delta: ${message.delta}`);
98-
break;
99-
}
100-
case "response.audio.delta": {
101-
const buffer = Buffer.from(message.delta, "base64");
102-
console.log(`Received ${buffer.length} bytes of audio data.`);
103-
break;
104-
}
105-
}
106-
if (message.type === "response.done" || message.type === "error") {
107-
break;
108-
}
109-
}
110-
}
111-
finally {
112-
client.close();
113-
}
102+
realtimeClient.send({
103+
type: "conversation.item.create",
104+
item: {
105+
type: "message",
106+
role: "user",
107+
content: [{ type: "input_text", text: "Please assist the user" }],
108+
},
109+
});
110+
realtimeClient.send({ type: "response.create" });
111+
});
112+
realtimeClient.on("error", (err) => {
113+
// Instead of throwing the error, you can log it
114+
// and continue processing events.
115+
throw err;
116+
});
117+
realtimeClient.on("session.created", (event) => {
118+
console.log("session created!", event.session);
119+
console.log();
120+
});
121+
realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
122+
realtimeClient.on("response.audio.delta", (event) => {
123+
const buffer = Buffer.from(event.delta, "base64");
124+
console.log(`Received ${buffer.length} bytes of audio data.`);
125+
});
126+
realtimeClient.on("response.audio_transcript.delta", (event) => {
127+
console.log(`Received text delta:${event.delta}.`);
128+
});
129+
realtimeClient.on("response.text.done", () => console.log());
130+
realtimeClient.on("response.done", () => realtimeClient.close());
131+
realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
114132
}
115-
await text_in_audio_out();
133+
main().catch((err) => {
134+
console.error("The sample encountered an error:", err);
135+
});
136+
export { main };
116137
```
117138
118139
1. Sign in to Azure with the following command:
@@ -132,56 +153,66 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
132153
1. Create the `index.js` file with the following code:
133154
134155
```javascript
135-
import { AzureKeyCredential } from "@azure/core-auth";
136-
import { LowLevelRTClient } from "rt-client";
137-
import dotenv from "dotenv";
138-
dotenv.config();
139-
async function text_in_audio_out() {
140-
// Set environment variables or edit the corresponding values here.
156+
import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
157+
import { AzureOpenAI } from "openai/index.mjs";
158+
async function main() {
159+
// You will need to set these environment variables or edit the following values
160+
const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
141161
const apiKey = process.env.AZURE_OPENAI_API_KEY || "Your API key";
142-
const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "Your endpoint";
143-
const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
144-
if (!endpoint || !deployment) {
145-
throw new Error("You didn't set the environment variables.");
146-
}
147-
const client = new LowLevelRTClient(new URL(endpoint), new AzureKeyCredential(apiKey), { deployment: deployment });
148-
try {
149-
await client.send({
150-
type: "response.create",
151-
response: {
152-
modalities: ["audio", "text"],
153-
instructions: "Please assist the user."
154-
}
162+
// Required Azure OpenAI deployment name and API version
163+
const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
164+
const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
165+
const azureOpenAIClient = new AzureOpenAI({
166+
apiKey: apiKey,
167+
apiVersion: apiVersion,
168+
deployment: deploymentName,
169+
endpoint: endpoint,
170+
});
171+
const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
172+
realtimeClient.socket.on("open", () => {
173+
console.log("Connection opened!");
174+
realtimeClient.send({
175+
type: "session.update",
176+
session: {
177+
modalities: ["text", "audio"],
178+
model: "gpt-4o-mini-realtime-preview",
179+
},
180+
});
181+
realtimeClient.send({
182+
type: "conversation.item.create",
183+
item: {
184+
type: "message",
185+
role: "user",
186+
content: [{ type: "input_text", text: "Please assist the user" }],
187+
},
155188
});
156-
for await (const message of client.messages()) {
157-
switch (message.type) {
158-
case "response.done": {
159-
break;
160-
}
161-
case "error": {
162-
console.error(message.error);
163-
break;
164-
}
165-
case "response.audio_transcript.delta": {
166-
console.log(`Received text delta: ${message.delta}`);
167-
break;
168-
}
169-
case "response.audio.delta": {
170-
const buffer = Buffer.from(message.delta, "base64");
171-
console.log(`Received ${buffer.length} bytes of audio data.`);
172-
break;
173-
}
174-
}
175-
if (message.type === "response.done" || message.type === "error") {
176-
break;
177-
}
178-
}
179-
}
180-
finally {
181-
client.close();
182-
}
189+
realtimeClient.send({ type: "response.create" });
190+
});
191+
realtimeClient.on("error", (err) => {
192+
// Instead of throwing the error, you can log it
193+
// and continue processing events.
194+
throw err;
195+
});
196+
realtimeClient.on("session.created", (event) => {
197+
console.log("session created!", event.session);
198+
console.log();
199+
});
200+
realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
201+
realtimeClient.on("response.audio.delta", (event) => {
202+
const buffer = Buffer.from(event.delta, "base64");
203+
console.log(`Received ${buffer.length} bytes of audio data.`);
204+
});
205+
realtimeClient.on("response.audio_transcript.delta", (event) => {
206+
console.log(`Received text delta:${event.delta}.`);
207+
});
208+
realtimeClient.on("response.text.done", () => console.log());
209+
realtimeClient.on("response.done", () => realtimeClient.close());
210+
realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
183211
}
184-
await text_in_audio_out();
212+
main().catch((err) => {
213+
console.error("The sample encountered an error:", err);
214+
});
215+
export { main };
185216
```
186217
187218
1. Run the JavaScript file.
@@ -201,22 +232,25 @@ The script gets a response from the model and prints the transcript and audio da
201232
The output will look similar to the following:
202233
203234
```console
204-
Received text delta: Hello
205-
Received text delta: !
206-
Received text delta: How
207-
Received text delta: can
208-
Received text delta: I
235+
Received text delta:Of.
236+
Received text delta: course.
237+
Received text delta:!.
238+
Received text delta: How.
239+
Received text delta: can.
209240
Received 4800 bytes of audio data.
210241
Received 7200 bytes of audio data.
211-
Received text delta: help
242+
Received text delta: I.
212243
Received 12000 bytes of audio data.
213-
Received text delta: you
214-
Received text delta: today
215-
Received text delta: ?
244+
Received text delta: help.
245+
Received text delta: you.
246+
Received text delta: today.
247+
Received text delta:?.
216248
Received 12000 bytes of audio data.
217249
Received 12000 bytes of audio data.
218250
Received 12000 bytes of audio data.
219-
Received 24000 bytes of audio data.
251+
Received 26400 bytes of audio data.
252+
253+
Connection closed!
220254
```
221255
222256
## Web application sample

0 commit comments

Comments
 (0)