Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Basic text based example with `ws`:
import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';

const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
await rt.open();

// access the underlying `ws.WebSocket` instance
rt.socket.on('open', () => {
Expand Down Expand Up @@ -147,6 +148,7 @@ To use the web API `WebSocket` implementation, replace `OpenAIRealtimeWS` with `
import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';

const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
await rt.open();
// ...
rt.socket.addEventListener('open', () => {
// ...
Expand All @@ -163,6 +165,7 @@ It is **highly recommended** that you register an `error` event listener and han

```ts
const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
await rt.open();
rt.on('error', (err) => {
// in a real world scenario this should be logged somewhere as you
// likely want to continue procesing events regardless of any errors
Expand Down Expand Up @@ -499,7 +502,7 @@ const credential = new DefaultAzureCredential();
const scope = 'https://cognitiveservices.azure.com/.default';
const azureADTokenProvider = getBearerTokenProvider(credential, scope);

const openai = new AzureOpenAI({ azureADTokenProvider });
const openai = new AzureOpenAI({ azureADTokenProvider, apiVersion: "<The API version, e.g. 2024-10-01-preview>" });

const result = await openai.chat.completions.create({
model: 'gpt-4o',
Expand Down
3 changes: 2 additions & 1 deletion examples/azure.ts → examples/azure/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import { AzureOpenAI } from 'openai';
import { getBearerTokenProvider, DefaultAzureCredential } from '@azure/identity';
import 'dotenv/config';

// Corresponds to your Model deployment within your OpenAI resource, e.g. gpt-4-1106-preview
// Navigate to the Azure OpenAI Studio to deploy a model.
Expand All @@ -13,7 +14,7 @@ const azureADTokenProvider = getBearerTokenProvider(credential, scope);

// Make sure to set AZURE_OPENAI_ENDPOINT with the endpoint of your Azure resource.
// You can find it in the Azure Portal.
const openai = new AzureOpenAI({ azureADTokenProvider });
const openai = new AzureOpenAI({ azureADTokenProvider, apiVersion: '2024-10-01-preview' });

async function main() {
console.log('Non-streaming:');
Expand Down
57 changes: 57 additions & 0 deletions examples/azure/websocket.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
import { AzureOpenAI } from 'openai';
import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
import 'dotenv/config';

async function main() {
const cred = new DefaultAzureCredential();
const scope = 'https://cognitiveservices.azure.com/.default';
const deploymentName = 'gpt-4o-realtime-preview-1001';
const azureADTokenProvider = getBearerTokenProvider(cred, scope);
const client = new AzureOpenAI({ azureADTokenProvider, apiVersion: '2024-10-01-preview' });
const rt = new OpenAIRealtimeWebSocket({ model: deploymentName }, client);
await rt.open();

// access the underlying `ws.WebSocket` instance
rt.socket.addEventListener('open', () => {
console.log('Connection opened!');
rt.send({
type: 'session.update',
session: {
modalities: ['text'],
model: 'gpt-4o-realtime-preview',
},
});

rt.send({
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
},
});

rt.send({ type: 'response.create' });
});

rt.on('error', (err) => {
// in a real world scenario this should be logged somewhere as you
// likely want to continue procesing events regardless of any errors
throw err;
});

rt.on('session.created', (event) => {
console.log('session created!', event.session);
console.log();
});

rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
rt.on('response.text.done', () => console.log());

rt.on('response.done', () => rt.close());

rt.socket.addEventListener('close', () => console.log('\nConnection closed!'));
}

main();
64 changes: 64 additions & 0 deletions examples/azure/ws.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';
import { AzureOpenAI } from 'openai';
import 'dotenv/config';

async function main() {
const cred = new DefaultAzureCredential();
const scope = 'https://cognitiveservices.azure.com/.default';
const deploymentName = 'gpt-4o-realtime-preview-1001';
const azureADTokenProvider = getBearerTokenProvider(cred, scope);
const client = new AzureOpenAI({ azureADTokenProvider, apiVersion: '2024-10-01-preview' });
const rt = new OpenAIRealtimeWS({ model: deploymentName }, client);
await rt.open();

// access the underlying `ws.WebSocket` instance
rt.socket.on('open', () => {
console.log('Connection opened!');
rt.send({
type: 'session.update',
session: {
modalities: ['text'],
model: 'gpt-4o-realtime-preview',
},
});
rt.send({
type: 'session.update',
session: {
modalities: ['text'],
model: 'gpt-4o-realtime-preview',
},
});

rt.send({
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }],
},
});

rt.send({ type: 'response.create' });
});

rt.on('error', (err) => {
// in a real world scenario this should be logged somewhere as you
// likely want to continue procesing events regardless of any errors
throw err;
});

rt.on('session.created', (event) => {
console.log('session created!', event.session);
console.log();
});

rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
rt.on('response.text.done', () => console.log());

rt.on('response.done', () => rt.close());

rt.socket.on('close', () => console.log('\nConnection closed!'));
}

main();
1 change: 1 addition & 0 deletions examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"private": true,
"dependencies": {
"@azure/identity": "^4.2.0",
"dotenv": "^16.4.7",
"express": "^4.18.2",
"next": "^14.1.1",
"openai": "file:..",
Expand Down
1 change: 1 addition & 0 deletions examples/realtime/websocket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';

async function main() {
const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
await rt.open();

// access the underlying `ws.WebSocket` instance
rt.socket.addEventListener('open', () => {
Expand Down
3 changes: 2 additions & 1 deletion examples/realtime/ws.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';

async function main() {
const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
await rt.open();

// access the underlying `ws.WebSocket` instance
rt.socket.on('open', () => {
console.log('Connection opened!');
rt.send({
type: 'session.update',
session: {
modalities: ['foo'] as any,
modalities: ['text'],
model: 'gpt-4o-realtime-preview',
},
});
Expand Down
14 changes: 10 additions & 4 deletions src/beta/realtime/internal-base.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { RealtimeClientEvent, RealtimeServerEvent, ErrorEvent } from '../../resources/beta/realtime/realtime';
import { EventEmitter } from '../../lib/EventEmitter';
import { OpenAIError } from '../../error';
import OpenAI, { AzureOpenAI } from 'openai';

export class OpenAIRealtimeError extends OpenAIError {
/**
Expand Down Expand Up @@ -73,11 +74,16 @@ export abstract class OpenAIRealtimeEmitter extends EventEmitter<RealtimeEvents>
}
}

export function buildRealtimeURL(props: { baseURL: string; model: string }): URL {
export function buildRealtimeURL(client: Pick<OpenAI, 'apiKey' | 'baseURL'>, model: string): URL {
const path = '/realtime';

const url = new URL(props.baseURL + (props.baseURL.endsWith('/') ? path.slice(1) : path));
const baseURL = client.baseURL;
const url = new URL(baseURL + (baseURL.endsWith('/') ? path.slice(1) : path));
url.protocol = 'wss';
url.searchParams.set('model', props.model);
if (client instanceof AzureOpenAI) {
url.searchParams.set('api-version', client.apiVersion);
url.searchParams.set('deployment', model);
} else {
url.searchParams.set('model', model);
}
return url;
}
42 changes: 31 additions & 11 deletions src/beta/realtime/websocket.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { OpenAI } from '../../index';
import { AzureOpenAI, OpenAI } from '../../index';
import { OpenAIError } from '../../error';
import * as Core from '../../core';
import type { RealtimeClientEvent, RealtimeServerEvent } from '../../resources/beta/realtime/realtime';
Expand Down Expand Up @@ -27,7 +27,9 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter {
model: string;
dangerouslyAllowBrowser?: boolean;
},
client?: Pick<OpenAI, 'apiKey' | 'baseURL'>,
private client: Pick<OpenAI, 'apiKey' | 'baseURL'> = new OpenAI({
dangerouslyAllowBrowser: props.dangerouslyAllowBrowser,
}),
) {
super();

Expand All @@ -41,16 +43,31 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter {
"It looks like you're running in a browser-like environment.\n\nThis is disabled by default, as it risks exposing your secret API credentials to attackers.\n\nYou can avoid this error by creating an ephemeral session token:\nhttps://platform.openai.com/docs/api-reference/realtime-sessions\n",
);
}
this.url = buildRealtimeURL(this.client, props.model);
}

client ??= new OpenAI({ dangerouslyAllowBrowser });

this.url = buildRealtimeURL({ baseURL: client.baseURL, model: props.model });
// @ts-ignore
this.socket = new WebSocket(this.url, [
'realtime',
`openai-insecure-api-key.${client.apiKey}`,
'openai-beta.realtime-v1',
]);
async open(): Promise<void> {
if (this.client instanceof AzureOpenAI) {
if (this.client.apiKey !== '<Missing Key>') {
this.url.searchParams.set('api-key', this.client.apiKey);
} else {
const token = await this.client.getAzureADToken();
if (token) {
this.url.searchParams.set('Authorization', `Bearer ${token}`);
} else {
throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.');
}
}
// @ts-ignore
this.socket = new WebSocket(this.url, ['realtime', 'openai-beta.realtime-v1']);
} else {
// @ts-ignore
this.socket = new WebSocket(this.url, [
'realtime',
`openai-insecure-api-key.${this.client.apiKey}`,
'openai-beta.realtime-v1',
]);
}

this.socket.addEventListener('message', (websocketEvent: MessageEvent) => {
const event = (() => {
Expand Down Expand Up @@ -80,6 +97,9 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter {
}

send(event: RealtimeClientEvent) {
if (!this.socket) {
throw new Error('The socket is not open, call `open` first');
}
try {
this.socket.send(JSON.stringify(event));
} catch (err) {
Expand Down
61 changes: 48 additions & 13 deletions src/beta/realtime/ws.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,60 @@
import * as WS from 'ws';
import { OpenAI } from '../../index';
import { AzureOpenAI, OpenAI } from '../../index';
import type { RealtimeClientEvent, RealtimeServerEvent } from '../../resources/beta/realtime/realtime';
import { OpenAIRealtimeEmitter, buildRealtimeURL } from './internal-base';

export class OpenAIRealtimeWS extends OpenAIRealtimeEmitter {
url: URL;
socket: WS.WebSocket;
options: WS.ClientOptions | undefined;

constructor(
props: { model: string; options?: WS.ClientOptions | undefined },
client?: Pick<OpenAI, 'apiKey' | 'baseURL'>,
private client: Pick<OpenAI, 'apiKey' | 'baseURL'> = new OpenAI(),
) {
super();
client ??= new OpenAI();
this.options = props.options;
this.url = buildRealtimeURL(client, props.model);
this.socket = undefined as any;
}

this.url = buildRealtimeURL({ baseURL: client.baseURL, model: props.model });
this.socket = new WS.WebSocket(this.url, {
...props.options,
headers: {
...props.options?.headers,
Authorization: `Bearer ${client.apiKey}`,
'OpenAI-Beta': 'realtime=v1',
},
});
async open(): Promise<void> {
const headers = {
...this.options?.headers,
'OpenAI-Beta': 'realtime=v1',
};
if (this.client instanceof AzureOpenAI) {
if (this.client.apiKey !== '<Missing Key>') {
this.socket = new WS.WebSocket(this.url, {
...this.options,
headers: {
...headers,
'api-key': this.client.apiKey,
},
});
} else {
const token = await this.client.getAzureADToken();
if (token) {
this.socket = new WS.WebSocket(this.url, {
...this.options,
headers: {
...headers,
Authorization: `Bearer ${token}`,
},
});
} else {
throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.');
}
}
} else {
this.socket = new WS.WebSocket(this.url, {
...this.options,
headers: {
...headers,
Authorization: `Bearer ${this.client.apiKey}`,
},
});
}

this.socket.on('message', (wsEvent) => {
const event = (() => {
Expand Down Expand Up @@ -52,6 +84,9 @@ export class OpenAIRealtimeWS extends OpenAIRealtimeEmitter {
}

send(event: RealtimeClientEvent) {
if (!this.socket) {
throw new Error('The socket is not open, call `open` first');
}
try {
this.socket.send(JSON.stringify(event));
} catch (err) {
Expand All @@ -61,7 +96,7 @@ export class OpenAIRealtimeWS extends OpenAIRealtimeEmitter {

close(props?: { code: number; reason: string }) {
try {
this.socket.close(props?.code ?? 1000, props?.reason ?? 'OK');
this.socket?.close(props?.code ?? 1000, props?.reason ?? 'OK');
} catch (err) {
this._onError(null, 'could not close the connection', err);
}
Expand Down
Loading
Loading