Skip to content

Commit 839244c

Browse files
committed
[Azure] Support Realtime API
1 parent fc019df commit 839244c

File tree

8 files changed

+366
-9
lines changed

8 files changed

+366
-9
lines changed

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ const credential = new DefaultAzureCredential();
499499
const scope = 'https://cognitiveservices.azure.com/.default';
500500
const azureADTokenProvider = getBearerTokenProvider(credential, scope);
501501

502-
const openai = new AzureOpenAI({ azureADTokenProvider });
502+
const openai = new AzureOpenAI({ azureADTokenProvider, apiVersion: "<The API version, e.g. 2024-10-01-preview>" });
503503

504504
const result = await openai.chat.completions.create({
505505
model: 'gpt-4o',
@@ -509,6 +509,31 @@ const result = await openai.chat.completions.create({
509509
console.log(result.choices[0]!.message?.content);
510510
```
511511

512+
### Realtime API
513+
This SDK provides real-time streaming capabilities for Azure OpenAI through the `AzureOpenAIRealtimeWS` and `AzureOpenAIRealtimeWebSocket` classes. These classes parallel the `OpenAIRealtimeWS` and `OpenAIRealtimeWebSocket` clients described previously, but they are specifically adapted for Azure OpenAI endpoints.
514+
515+
To utilize the real-time features, begin by creating a fully configured `AzureOpenAI` client and passing it into either `AzureOpenAIRealtimeWS` or `AzureOpenAIRealtimeWebSocket`. For example:
516+
517+
```ts
518+
const cred = new DefaultAzureCredential();
519+
const scope = 'https://cognitiveservices.azure.com/.default';
520+
const deploymentName = 'gpt-4o-realtime-preview-1001';
521+
const azureADTokenProvider = getBearerTokenProvider(cred, scope);
522+
const client = new AzureOpenAI({
523+
azureADTokenProvider,
524+
apiVersion: '2024-10-01-preview',
525+
deployment: deploymentName,
526+
});
527+
const rt = new AzureOpenAIRealtimeWS(client);
528+
```
529+
530+
Once the real-time client has been created, open its underlying WebSocket connection by invoking the open method:
531+
```ts
532+
await rt.open();
533+
```
534+
535+
With the connection established, you can then begin sending requests and receiving streaming responses in real time.
536+
512537
### Retries
513538

514539
Certain errors will be automatically retried 2 times by default, with a short exponential backoff.

examples/azure.ts renamed to examples/azure/chat.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import { AzureOpenAI } from 'openai';
44
import { getBearerTokenProvider, DefaultAzureCredential } from '@azure/identity';
5+
import 'dotenv/config';
56

67
// Corresponds to your Model deployment within your OpenAI resource, e.g. gpt-4-1106-preview
78
// Navigate to the Azure OpenAI Studio to deploy a model.
@@ -13,7 +14,7 @@ const azureADTokenProvider = getBearerTokenProvider(credential, scope);
1314

1415
// Make sure to set AZURE_OPENAI_ENDPOINT with the endpoint of your Azure resource.
1516
// You can find it in the Azure Portal.
16-
const openai = new AzureOpenAI({ azureADTokenProvider });
17+
const openai = new AzureOpenAI({ azureADTokenProvider, apiVersion: '2024-10-01-preview' });
1718

1819
async function main() {
1920
console.log('Non-streaming:');

examples/azure/websocket.ts

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import { AzureOpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
2+
import { AzureOpenAI } from 'openai';
3+
import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
4+
import 'dotenv/config';
5+
6+
async function main() {
7+
const cred = new DefaultAzureCredential();
8+
const scope = 'https://cognitiveservices.azure.com/.default';
9+
const deploymentName = 'gpt-4o-realtime-preview-1001';
10+
const azureADTokenProvider = getBearerTokenProvider(cred, scope);
11+
const client = new AzureOpenAI({
12+
azureADTokenProvider,
13+
apiVersion: '2024-10-01-preview',
14+
deployment: deploymentName,
15+
});
16+
const rt = new AzureOpenAIRealtimeWebSocket(client);
17+
await rt.open();
18+
19+
// access the underlying `ws.WebSocket` instance
20+
rt.socket.addEventListener('open', () => {
21+
console.log('Connection opened!');
22+
rt.send({
23+
type: 'session.update',
24+
session: {
25+
modalities: ['text'],
26+
model: 'gpt-4o-realtime-preview',
27+
},
28+
});
29+
30+
rt.send({
31+
type: 'conversation.item.create',
32+
item: {
33+
type: 'message',
34+
role: 'user',
35+
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
36+
},
37+
});
38+
39+
rt.send({ type: 'response.create' });
40+
});
41+
42+
rt.on('error', (err) => {
43+
// in a real world scenario this should be logged somewhere as you
44+
// likely want to continue procesing events regardless of any errors
45+
throw err;
46+
});
47+
48+
rt.on('session.created', (event) => {
49+
console.log('session created!', event.session);
50+
console.log();
51+
});
52+
53+
rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
54+
rt.on('response.text.done', () => console.log());
55+
56+
rt.on('response.done', () => rt.close());
57+
58+
rt.socket.addEventListener('close', () => console.log('\nConnection closed!'));
59+
}
60+
61+
main();

examples/azure/ws.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
2+
import { AzureOpenAIRealtimeWS } from 'openai/beta/realtime/ws';
3+
import { AzureOpenAI } from 'openai';
4+
import 'dotenv/config';
5+
6+
async function main() {
7+
const cred = new DefaultAzureCredential();
8+
const scope = 'https://cognitiveservices.azure.com/.default';
9+
const deploymentName = 'gpt-4o-realtime-preview-1001';
10+
const azureADTokenProvider = getBearerTokenProvider(cred, scope);
11+
const client = new AzureOpenAI({
12+
azureADTokenProvider,
13+
apiVersion: '2024-10-01-preview',
14+
deployment: deploymentName,
15+
});
16+
const rt = new AzureOpenAIRealtimeWS(client);
17+
await rt.open();
18+
19+
// access the underlying `ws.WebSocket` instance
20+
rt.socket.on('open', () => {
21+
console.log('Connection opened!');
22+
rt.send({
23+
type: 'session.update',
24+
session: {
25+
modalities: ['text'],
26+
model: 'gpt-4o-realtime-preview',
27+
},
28+
});
29+
rt.send({
30+
type: 'session.update',
31+
session: {
32+
modalities: ['text'],
33+
model: 'gpt-4o-realtime-preview',
34+
},
35+
});
36+
37+
rt.send({
38+
type: 'conversation.item.create',
39+
item: {
40+
type: 'message',
41+
role: 'user',
42+
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
43+
},
44+
});
45+
46+
rt.send({ type: 'response.create' });
47+
});
48+
49+
rt.on('error', (err) => {
50+
// in a real world scenario this should be logged somewhere as you
51+
// likely want to continue procesing events regardless of any errors
52+
throw err;
53+
});
54+
55+
rt.on('session.created', (event) => {
56+
console.log('session created!', event.session);
57+
console.log();
58+
});
59+
60+
rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
61+
rt.on('response.text.done', () => console.log());
62+
63+
rt.on('response.done', () => rt.close());
64+
65+
rt.socket.on('close', () => console.log('\nConnection closed!'));
66+
}
67+
68+
main();

examples/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"private": true,
88
"dependencies": {
99
"@azure/identity": "^4.2.0",
10+
"dotenv": "^16.4.7",
1011
"express": "^4.18.2",
1112
"next": "^14.1.1",
1213
"openai": "file:..",

src/beta/realtime/websocket.ts

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { OpenAI } from '../../index';
1+
import { AzureOpenAI, OpenAI } from '../../index';
22
import { OpenAIError } from '../../error';
33
import * as Core from '../../core';
44
import type { RealtimeClientEvent, RealtimeServerEvent } from '../../resources/beta/realtime/realtime';
@@ -95,3 +95,106 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter {
9595
}
9696
}
9797
}
98+
99+
export class AzureOpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter {
100+
socket: _WebSocket;
101+
102+
constructor(
103+
private client: AzureOpenAI,
104+
private options: {
105+
deploymentName?: string;
106+
} = {},
107+
) {
108+
super();
109+
}
110+
111+
async open(): Promise<void> {
112+
async function getUrl({
113+
apiVersion,
114+
baseURL,
115+
deploymentName,
116+
apiKey,
117+
token,
118+
}: {
119+
baseURL: string;
120+
deploymentName: string;
121+
apiVersion: string;
122+
apiKey: string;
123+
token: string | undefined;
124+
}): Promise<URL> {
125+
const path = '/realtime';
126+
const url = new URL(baseURL + (baseURL.endsWith('/') ? path.slice(1) : path));
127+
url.protocol = 'wss';
128+
url.searchParams.set('api-version', apiVersion);
129+
url.searchParams.set('deployment', deploymentName);
130+
if (apiKey !== '<Missing Key>') {
131+
url.searchParams.set('api-key', apiKey);
132+
} else {
133+
if (token) {
134+
url.searchParams.set('Authorization', `Bearer ${token}`);
135+
} else {
136+
throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.');
137+
}
138+
}
139+
return url;
140+
}
141+
const deploymentName = this.client.deploymentName ?? this.options.deploymentName;
142+
if (!deploymentName) {
143+
throw new Error('No deployment name provided');
144+
}
145+
const url = await getUrl({
146+
apiVersion: this.client.apiVersion,
147+
baseURL: this.client.baseURL,
148+
deploymentName,
149+
apiKey: this.client.apiKey,
150+
token: await this.client.getAzureADToken(),
151+
});
152+
// @ts-ignore
153+
this.socket = new WebSocket(url, ['realtime', 'openai-beta.realtime-v1']);
154+
155+
this.socket.addEventListener('message', (websocketEvent: MessageEvent) => {
156+
const event = (() => {
157+
try {
158+
return JSON.parse(websocketEvent.data.toString()) as RealtimeServerEvent;
159+
} catch (err) {
160+
this._onError(null, 'could not parse websocket event', err);
161+
return null;
162+
}
163+
})();
164+
165+
if (event) {
166+
this._emit('event', event);
167+
168+
if (event.type === 'error') {
169+
this._onError(event);
170+
} else {
171+
// @ts-expect-error TS isn't smart enough to get the relationship right here
172+
this._emit(event.type, event);
173+
}
174+
}
175+
});
176+
177+
this.socket.addEventListener('error', (event: any) => {
178+
this._onError(null, event.message, null);
179+
});
180+
}
181+
182+
send(event: RealtimeClientEvent) {
183+
if (!this.socket) {
184+
throw new Error('Socket is not open, call open() first');
185+
}
186+
try {
187+
this.socket.send(JSON.stringify(event));
188+
} catch (err) {
189+
this._onError(null, 'could not send data', err);
190+
}
191+
}
192+
193+
close(props?: { code: number; reason: string }) {
194+
try {
195+
this.socket?.close(props?.code ?? 1000, props?.reason ?? 'OK');
196+
} catch (err) {
197+
this._onError(null, 'could not close the connection', err);
198+
}
199+
}
200+
}

0 commit comments

Comments
 (0)