@@ -38,10 +38,10 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
3838 npm init -y
3939 ` ` `
4040
41- 1. Install the real-time audio client library for JavaScript with:
41+ 1. Install the OpenAI client library for JavaScript with:
4242
4343 ` ` ` console
44- npm install https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.2/rt-client-0.5.2.tgz
44+ npm install openai
4545 ` ` `
4646
47471. For the ** recommended** keyless authentication with Microsoft Entra ID, install the ` @azure/identity` package with:
@@ -50,6 +50,12 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
5050 npm install @azure/identity
5151 ` ` `
5252
53+ 1. Install the websocket client library for JavaScript with:
54+
55+ ` ` ` console
56+ npm install ws
57+ ` ` `
58+
5359# # Retrieve resource information
5460
5561[! INCLUDE [resource authentication](resource-authentication.md)]
@@ -64,55 +70,70 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
64701. Create the `index.js` file with the following code:
6571
6672 ```javascript
67- import { DefaultAzureCredential } from "@azure/identity";
68- import { LowLevelRTClient } from "rt-client";
69- import dotenv from "dotenv";
70- dotenv.config();
71- async function text_in_audio_out() {
72- // Set environment variables or edit the corresponding values here.
73- const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "YourEndpoint";
74- const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
75- if (!endpoint || !deployment) {
76- throw new Error("You didn' t set the environment variables." );
77- }
78- const client = new LowLevelRTClient(new URL(endpoint), new DefaultAzureCredential(), { deployment: deployment });
79- try {
80- await client.send({
81- type: " response.create" ,
82- response: {
83- modalities: [" audio" , " text" ],
84- instructions: " Please assist the user."
85- }
73+ import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
74+ import { AzureOpenAI } from "openai/index.mjs";
75+ import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
76+ async function main() {
77+ // You will need to set these environment variables or edit the following values
78+ const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
79+ // Required Azure OpenAI deployment name and API version
80+ const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
81+ const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
82+ // Keyless authentication
83+ const credential = new DefaultAzureCredential();
84+ const scope = "https://cognitiveservices.azure.com/.default";
85+ const azureADTokenProvider = getBearerTokenProvider(credential, scope);
86+ const azureOpenAIClient = new AzureOpenAI({
87+ azureADTokenProvider,
88+ apiVersion: apiVersion,
89+ deployment: deploymentName,
90+ endpoint: endpoint,
91+ });
92+ const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
93+ realtimeClient.socket.on("open", () => {
94+ console.log("Connection opened!");
95+ realtimeClient.send({
96+ type: "session.update",
97+ session: {
98+ modalities: ["text", "audio"],
99+ model: "gpt-4o-mini-realtime-preview",
100+ },
86101 });
87- for await (const message of client.messages()) {
88- switch (message.type) {
89- case " response.done" : {
90- break;
91- }
92- case " error" : {
93- console.error(message.error);
94- break;
95- }
96- case " response.audio_transcript.delta" : {
97- console.log(` Received text delta: ${message.delta} ` );
98- break;
99- }
100- case " response.audio.delta" : {
101- const buffer = Buffer.from(message.delta, " base64" );
102- console.log(` Received ${buffer.length} bytes of audio data.` );
103- break;
104- }
105- }
106- if (message.type === " response.done" || message.type === " error" ) {
107- break;
108- }
109- }
110- }
111- finally {
112- client.close();
113- }
102+ realtimeClient.send({
103+ type: "conversation.item.create",
104+ item: {
105+ type: "message",
106+ role: "user",
107+ content: [{ type: "input_text", text: "Please assist the user" }],
108+ },
109+ });
110+ realtimeClient.send({ type: "response.create" });
111+ });
112+ realtimeClient.on("error", (err) => {
113+ // Instead of throwing the error, you can log it
114+ // and continue processing events.
115+ throw err;
116+ });
117+ realtimeClient.on("session.created", (event) => {
118+ console.log("session created!", event.session);
119+ console.log();
120+ });
121+ realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
122+ realtimeClient.on("response.audio.delta", (event) => {
123+ const buffer = Buffer.from(event.delta, "base64");
124+ console.log(`Received ${buffer.length} bytes of audio data.`);
125+ });
126+ realtimeClient.on("response.audio_transcript.delta", (event) => {
127+ console.log(`Received text delta:${event.delta}.`);
128+ });
129+ realtimeClient.on("response.text.done", () => console.log());
130+ realtimeClient.on("response.done", () => realtimeClient.close());
131+ realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
114132 }
115- await text_in_audio_out();
133+ main().catch((err) => {
134+ console.error("The sample encountered an error:", err);
135+ });
136+ export { main };
116137 ```
117138
1181391. Sign in to Azure with the following command:
@@ -132,56 +153,66 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
1321531. Create the `index.js` file with the following code:
133154
134155 ```javascript
135- import { AzureKeyCredential } from " @azure/core-auth" ;
136- import { LowLevelRTClient } from " rt-client" ;
137- import dotenv from " dotenv" ;
138- dotenv.config();
139- async function text_in_audio_out() {
140- // Set environment variables or edit the corresponding values here.
156+ import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
157+ import { AzureOpenAI } from "openai/index.mjs";
158+ async function main() {
159+ // You will need to set these environment variables or edit the following values
160+ const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
141161 const apiKey = process.env.AZURE_OPENAI_API_KEY || "Your API key";
142- const endpoint = process.env.AZURE_OPENAI_ENDPOINT || " Your endpoint" ;
143- const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || " gpt-4o-mini-realtime-preview" ;
144- if (!endpoint || !deployment) {
145- throw new Error(" You didn' t set the environment variables.");
146- }
147- const client = new LowLevelRTClient(new URL(endpoint), new AzureKeyCredential(apiKey), { deployment: deployment });
148- try {
149- await client.send({
150- type: "response.create",
151- response: {
152- modalities: ["audio", "text"],
153- instructions: "Please assist the user."
154- }
162+ // Required Azure OpenAI deployment name and API version
163+ const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
164+ const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
165+ const azureOpenAIClient = new AzureOpenAI({
166+ apiKey: apiKey,
167+ apiVersion: apiVersion,
168+ deployment: deploymentName,
169+ endpoint: endpoint,
170+ });
171+ const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
172+ realtimeClient.socket.on("open", () => {
173+ console.log("Connection opened!");
174+ realtimeClient.send({
175+ type: "session.update",
176+ session: {
177+ modalities: ["text", "audio"],
178+ model: "gpt-4o-mini-realtime-preview",
179+ },
180+ });
181+ realtimeClient.send({
182+ type: "conversation.item.create",
183+ item: {
184+ type: "message",
185+ role: "user",
186+ content: [{ type: "input_text", text: "Please assist the user" }],
187+ },
155188 });
156- for await (const message of client.messages()) {
157- switch (message.type) {
158- case "response.done": {
159- break;
160- }
161- case "error": {
162- console.error(message.error);
163- break;
164- }
165- case "response.audio_transcript.delta": {
166- console.log(`Received text delta: ${message.delta}`);
167- break;
168- }
169- case "response.audio.delta": {
170- const buffer = Buffer.from(message.delta, "base64");
171- console.log(`Received ${buffer.length} bytes of audio data.`);
172- break;
173- }
174- }
175- if (message.type === "response.done" || message.type === "error") {
176- break;
177- }
178- }
179- }
180- finally {
181- client.close();
182- }
189+ realtimeClient.send({ type: "response.create" });
190+ });
191+ realtimeClient.on("error", (err) => {
192+ // Instead of throwing the error, you can log it
193+ // and continue processing events.
194+ throw err;
195+ });
196+ realtimeClient.on("session.created", (event) => {
197+ console.log("session created!", event.session);
198+ console.log();
199+ });
200+ realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
201+ realtimeClient.on("response.audio.delta", (event) => {
202+ const buffer = Buffer.from(event.delta, "base64");
203+ console.log(`Received ${buffer.length} bytes of audio data.`);
204+ });
205+ realtimeClient.on("response.audio_transcript.delta", (event) => {
206+ console.log(`Received text delta:${event.delta}.`);
207+ });
208+ realtimeClient.on("response.text.done", () => console.log());
209+ realtimeClient.on("response.done", () => realtimeClient.close());
210+ realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
183211 }
184- await text_in_audio_out();
212+ main().catch((err) => {
213+ console.error("The sample encountered an error:", err);
214+ });
215+ export { main };
185216 ```
186217
1872181. Run the JavaScript file.
@@ -201,22 +232,25 @@ The script gets a response from the model and prints the transcript and audio da
201232The output will look similar to the following:
202233
203234```console
204- Received text delta: Hello
205- Received text delta: !
206- Received text delta: How
207- Received text delta: can
208- Received text delta: I
235+ Received text delta:Of.
236+ Received text delta: course.
237+ Received text delta:!.
238+ Received text delta: How.
239+ Received text delta: can.
209240Received 4800 bytes of audio data.
210241Received 7200 bytes of audio data.
211- Received text delta: help
242+ Received text delta: I.
212243Received 12000 bytes of audio data.
213- Received text delta: you
214- Received text delta: today
215- Received text delta: ?
244+ Received text delta: help.
245+ Received text delta: you.
246+ Received text delta: today.
247+ Received text delta:?.
216248Received 12000 bytes of audio data.
217249Received 12000 bytes of audio data.
218250Received 12000 bytes of audio data.
219- Received 24000 bytes of audio data.
251+ Received 26400 bytes of audio data.
252+
253+ Connection closed!
220254```
221255
222256## Web application sample
0 commit comments