Skip to content

Commit a796d21

Browse files
RobertCraigiestainless-app[bot]
authored andcommitted
feat(client): add Realtime API support (#1266)
1 parent 66067d3 commit a796d21

File tree

11 files changed

+560
-3
lines changed

11 files changed

+560
-3
lines changed

README.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,93 @@ main();
8383
If you need to cancel a stream, you can `break` from the loop
8484
or call `stream.controller.abort()`.
8585

86+
## Realtime API beta
87+
88+
The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a `WebSocket` connection.
89+
90+
The Realtime API works through a combination of client-sent events and server-sent events. Clients can send events to do things like update session configuration or send text and audio inputs. Server events confirm when audio responses have completed, or when a text response from the model has been received. A full event reference can be found [here](https://platform.openai.com/docs/api-reference/realtime-client-events) and a guide can be found [here](https://platform.openai.com/docs/guides/realtime).
91+
92+
This SDK supports accessing the Realtime API through the [WebSocket API](https://developer.mozilla.org/en-US/docs/Web/API/WebSocket) or with [ws](https://github.com/websockets/ws).
93+
94+
Basic text based example with `ws`:
95+
96+
```ts
97+
// requires `yarn add ws @types/ws`
98+
import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';
99+
100+
const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
101+
102+
// access the underlying `ws.WebSocket` instance
103+
rt.socket.on('open', () => {
104+
console.log('Connection opened!');
105+
rt.send({
106+
type: 'session.update',
107+
session: {
108+
modalities: ['text'],
109+
model: 'gpt-4o-realtime-preview',
110+
},
111+
});
112+
113+
rt.send({
114+
type: 'conversation.item.create',
115+
item: {
116+
type: 'message',
117+
role: 'user',
118+
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
119+
},
120+
});
121+
122+
rt.send({ type: 'response.create' });
123+
});
124+
125+
rt.on('error', (err) => {
126+
// in a real world scenario this should be logged somewhere as you
127+
// likely want to continue procesing events regardless of any errors
128+
throw err;
129+
});
130+
131+
rt.on('session.created', (event) => {
132+
console.log('session created!', event.session);
133+
console.log();
134+
});
135+
136+
rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
137+
rt.on('response.text.done', () => console.log());
138+
139+
rt.on('response.done', () => rt.close());
140+
141+
rt.socket.on('close', () => console.log('\nConnection closed!'));
142+
```
143+
144+
To use the web API `WebSocket` implementation, replace `OpenAIRealtimeWS` with `OpenAIRealtimeWebSocket` and adjust any `rt.socket` access:
145+
146+
```ts
147+
import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
148+
149+
const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
150+
// ...
151+
rt.socket.addEventListener('open', () => {
152+
// ...
153+
});
154+
```
155+
156+
A full example can be found [here](https://github.com/openai/openai-node/blob/master/examples/realtime/web.ts).
157+
158+
### Realtime error handling
159+
160+
When an error is encountered, either on the client side or returned from the server through the [`error` event](https://platform.openai.com/docs/guides/realtime/realtime-api-beta#handling-errors), the `error` event listener will be fired. However, if you haven't registered an `error` event listener then an `unhandled Promise rejection` error will be thrown.
161+
162+
It is **highly recommended** that you register an `error` event listener and handle errors approriately as typically the underlying connection is still usable.
163+
164+
```ts
165+
const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
166+
rt.on('error', (err) => {
167+
// in a real world scenario this should be logged somewhere as you
168+
// likely want to continue procesing events regardless of any errors
169+
throw err;
170+
});
171+
```
172+
86173
### Request & Response types
87174

88175
This library includes TypeScript definitions for all request params and response fields. You may import and use them like so:

examples/package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
"license": "MIT",
77
"private": true,
88
"dependencies": {
9+
"@azure/identity": "^4.2.0",
910
"express": "^4.18.2",
1011
"next": "^14.1.1",
1112
"openai": "file:..",
12-
"zod-to-json-schema": "^3.21.4",
13-
"@azure/identity": "^4.2.0"
13+
"zod-to-json-schema": "^3.21.4"
1414
},
1515
"devDependencies": {
1616
"@types/body-parser": "^1.19.3",
17-
"@types/express": "^4.17.19"
17+
"@types/express": "^4.17.19",
18+
"@types/web": "^0.0.194"
1819
}
1920
}

examples/realtime/websocket.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
2+
3+
async function main() {
4+
const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
5+
6+
// access the underlying `ws.WebSocket` instance
7+
rt.socket.addEventListener('open', () => {
8+
console.log('Connection opened!');
9+
rt.send({
10+
type: 'session.update',
11+
session: {
12+
modalities: ['text'],
13+
model: 'gpt-4o-realtime-preview',
14+
},
15+
});
16+
17+
rt.send({
18+
type: 'conversation.item.create',
19+
item: {
20+
type: 'message',
21+
role: 'user',
22+
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
23+
},
24+
});
25+
26+
rt.send({ type: 'response.create' });
27+
});
28+
29+
rt.on('error', (err) => {
30+
// in a real world scenario this should be logged somewhere as you
31+
// likely want to continue procesing events regardless of any errors
32+
throw err;
33+
});
34+
35+
rt.on('session.created', (event) => {
36+
console.log('session created!', event.session);
37+
console.log();
38+
});
39+
40+
rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
41+
rt.on('response.text.done', () => console.log());
42+
43+
rt.on('response.done', () => rt.close());
44+
45+
rt.socket.addEventListener('close', () => console.log('\nConnection closed!'));
46+
}
47+
48+
main();

examples/realtime/ws.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';
2+
3+
async function main() {
4+
const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
5+
6+
// access the underlying `ws.WebSocket` instance
7+
rt.socket.on('open', () => {
8+
console.log('Connection opened!');
9+
rt.send({
10+
type: 'session.update',
11+
session: {
12+
modalities: ['foo'] as any,
13+
model: 'gpt-4o-realtime-preview',
14+
},
15+
});
16+
rt.send({
17+
type: 'session.update',
18+
session: {
19+
modalities: ['text'],
20+
model: 'gpt-4o-realtime-preview',
21+
},
22+
});
23+
24+
rt.send({
25+
type: 'conversation.item.create',
26+
item: {
27+
type: 'message',
28+
role: 'user',
29+
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
30+
},
31+
});
32+
33+
rt.send({ type: 'response.create' });
34+
});
35+
36+
rt.on('error', (err) => {
37+
// in a real world scenario this should be logged somewhere as you
38+
// likely want to continue procesing events regardless of any errors
39+
throw err;
40+
});
41+
42+
rt.on('session.created', (event) => {
43+
console.log('session created!', event.session);
44+
console.log();
45+
});
46+
47+
rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
48+
rt.on('response.text.done', () => console.log());
49+
50+
rt.on('response.done', () => rt.close());
51+
52+
rt.socket.on('close', () => console.log('\nConnection closed!'));
53+
}
54+
55+
main();

package.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"@swc/core": "^1.3.102",
3737
"@swc/jest": "^0.2.29",
3838
"@types/jest": "^29.4.0",
39+
"@types/ws": "^8.5.13",
3940
"@typescript-eslint/eslint-plugin": "^6.7.0",
4041
"@typescript-eslint/parser": "^6.7.0",
4142
"eslint": "^8.49.0",
@@ -52,6 +53,7 @@
5253
"tsc-multi": "^1.1.0",
5354
"tsconfig-paths": "^4.0.0",
5455
"typescript": "^4.8.2",
56+
"ws": "^8.18.0",
5557
"zod": "^3.23.8"
5658
},
5759
"sideEffects": [
@@ -126,9 +128,13 @@
126128
},
127129
"bin": "./bin/cli",
128130
"peerDependencies": {
131+
"ws": "^8.18.0",
129132
"zod": "^3.23.8"
130133
},
131134
"peerDependenciesMeta": {
135+
"ws": {
136+
"optional": true
137+
},
132138
"zod": {
133139
"optional": true
134140
}

src/beta/realtime/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export { OpenAIRealtimeError } from './internal-base';

src/beta/realtime/internal-base.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { RealtimeClientEvent, RealtimeServerEvent, ErrorEvent } from '../../resources/beta/realtime/realtime';
2+
import { EventEmitter } from '../../lib/EventEmitter';
3+
import { OpenAIError } from '../../error';
4+
5+
export class OpenAIRealtimeError extends OpenAIError {
6+
/**
7+
* The error data that the API sent back in an `error` event.
8+
*/
9+
error?: ErrorEvent.Error | undefined;
10+
11+
/**
12+
* The unique ID of the server event.
13+
*/
14+
event_id?: string | undefined;
15+
16+
constructor(message: string, event: ErrorEvent | null) {
17+
super(message);
18+
19+
this.error = event?.error;
20+
this.event_id = event?.event_id;
21+
}
22+
}
23+
24+
type Simplify<T> = { [KeyType in keyof T]: T[KeyType] } & {};
25+
26+
type RealtimeEvents = Simplify<
27+
{
28+
event: (event: RealtimeServerEvent) => void;
29+
error: (error: OpenAIRealtimeError) => void;
30+
} & {
31+
[EventType in Exclude<RealtimeServerEvent['type'], 'error'>]: (
32+
event: Extract<RealtimeServerEvent, { type: EventType }>,
33+
) => unknown;
34+
}
35+
>;
36+
37+
export abstract class OpenAIRealtimeEmitter extends EventEmitter<RealtimeEvents> {
38+
/**
39+
* Send an event to the API.
40+
*/
41+
abstract send(event: RealtimeClientEvent): void;
42+
43+
/**
44+
* Close the websocket connection.
45+
*/
46+
abstract close(props?: { code: number; reason: string }): void;
47+
48+
protected _onError(event: null, message: string, cause: any): void;
49+
protected _onError(event: ErrorEvent, message?: string | undefined): void;
50+
protected _onError(event: ErrorEvent | null, message?: string | undefined, cause?: any): void {
51+
message =
52+
event?.error ?
53+
`${event.error.message} code=${event.error.code} param=${event.error.param} type=${event.error.type} event_id=${event.error.event_id}`
54+
: message ?? 'unknown error';
55+
56+
if (!this._hasListener('error')) {
57+
const error = new OpenAIRealtimeError(
58+
message +
59+
`\n\nTo resolve these unhandled rejection errors you should bind an \`error\` callback, e.g. \`rt.on('error', (error) => ...)\` `,
60+
event,
61+
);
62+
// @ts-ignore
63+
error.cause = cause;
64+
Promise.reject(error);
65+
return;
66+
}
67+
68+
const error = new OpenAIRealtimeError(message, event);
69+
// @ts-ignore
70+
error.cause = cause;
71+
72+
this._emit('error', error);
73+
}
74+
}
75+
76+
export function buildRealtimeURL(props: { baseURL: string; model: string }): URL {
77+
const path = '/realtime';
78+
79+
const url = new URL(props.baseURL + (props.baseURL.endsWith('/') ? path.slice(1) : path));
80+
url.protocol = 'wss';
81+
url.searchParams.set('model', props.model);
82+
return url;
83+
}

0 commit comments

Comments
 (0)