diff --git a/README.md b/README.md index 3bd386e99..c56a03806 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ Basic text based example with `ws`: import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws'; const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' }); +await rt.open(); // access the underlying `ws.WebSocket` instance rt.socket.on('open', () => { @@ -147,6 +148,7 @@ To use the web API `WebSocket` implementation, replace `OpenAIRealtimeWS` with ` import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket'; const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' }); +await rt.open(); // ... rt.socket.addEventListener('open', () => { // ... @@ -163,6 +165,7 @@ It is **highly recommended** that you register an `error` event listener and han ```ts const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' }); +await rt.open(); rt.on('error', (err) => { // in a real world scenario this should be logged somewhere as you // likely want to continue procesing events regardless of any errors @@ -499,7 +502,7 @@ const credential = new DefaultAzureCredential(); const scope = 'https://cognitiveservices.azure.com/.default'; const azureADTokenProvider = getBearerTokenProvider(credential, scope); -const openai = new AzureOpenAI({ azureADTokenProvider }); +const openai = new AzureOpenAI({ azureADTokenProvider, apiVersion: "" }); const result = await openai.chat.completions.create({ model: 'gpt-4o', diff --git a/examples/azure.ts b/examples/azure/chat.ts similarity index 91% rename from examples/azure.ts rename to examples/azure/chat.ts index 5fe1718fa..46df820f8 100755 --- a/examples/azure.ts +++ b/examples/azure/chat.ts @@ -2,6 +2,7 @@ import { AzureOpenAI } from 'openai'; import { getBearerTokenProvider, DefaultAzureCredential } from '@azure/identity'; +import 'dotenv/config'; // Corresponds to your Model deployment within your OpenAI resource, e.g. gpt-4-1106-preview // Navigate to the Azure OpenAI Studio to deploy a model. @@ -13,7 +14,7 @@ const azureADTokenProvider = getBearerTokenProvider(credential, scope); // Make sure to set AZURE_OPENAI_ENDPOINT with the endpoint of your Azure resource. // You can find it in the Azure Portal. -const openai = new AzureOpenAI({ azureADTokenProvider }); +const openai = new AzureOpenAI({ azureADTokenProvider, apiVersion: '2024-10-01-preview' }); async function main() { console.log('Non-streaming:'); diff --git a/examples/azure/websocket.ts b/examples/azure/websocket.ts new file mode 100644 index 000000000..87e472143 --- /dev/null +++ b/examples/azure/websocket.ts @@ -0,0 +1,57 @@ +import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket'; +import { AzureOpenAI } from 'openai'; +import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity'; +import 'dotenv/config'; + +async function main() { + const cred = new DefaultAzureCredential(); + const scope = 'https://cognitiveservices.azure.com/.default'; + const deploymentName = 'gpt-4o-realtime-preview-1001'; + const azureADTokenProvider = getBearerTokenProvider(cred, scope); + const client = new AzureOpenAI({ azureADTokenProvider, apiVersion: '2024-10-01-preview' }); + const rt = new OpenAIRealtimeWebSocket({ model: deploymentName }, client); + await rt.open(); + + // access the underlying `ws.WebSocket` instance + rt.socket.addEventListener('open', () => { + console.log('Connection opened!'); + rt.send({ + type: 'session.update', + session: { + modalities: ['text'], + model: 'gpt-4o-realtime-preview', + }, + }); + + rt.send({ + type: 'conversation.item.create', + item: { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }], + }, + }); + + rt.send({ type: 'response.create' }); + }); + + rt.on('error', (err) => { + // in a real world scenario this should be logged somewhere as you + // likely want to continue procesing events regardless of any errors + throw err; + }); + + rt.on('session.created', (event) => { + console.log('session created!', event.session); + console.log(); + }); + + rt.on('response.text.delta', (event) => process.stdout.write(event.delta)); + rt.on('response.text.done', () => console.log()); + + rt.on('response.done', () => rt.close()); + + rt.socket.addEventListener('close', () => console.log('\nConnection closed!')); +} + +main(); diff --git a/examples/azure/ws.ts b/examples/azure/ws.ts new file mode 100644 index 000000000..9db8b223d --- /dev/null +++ b/examples/azure/ws.ts @@ -0,0 +1,64 @@ +import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity'; +import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws'; +import { AzureOpenAI } from 'openai'; +import 'dotenv/config'; + +async function main() { + const cred = new DefaultAzureCredential(); + const scope = 'https://cognitiveservices.azure.com/.default'; + const deploymentName = 'gpt-4o-realtime-preview-1001'; + const azureADTokenProvider = getBearerTokenProvider(cred, scope); + const client = new AzureOpenAI({ azureADTokenProvider, apiVersion: '2024-10-01-preview' }); + const rt = new OpenAIRealtimeWS({ model: deploymentName }, client); + await rt.open(); + + // access the underlying `ws.WebSocket` instance + rt.socket.on('open', () => { + console.log('Connection opened!'); + rt.send({ + type: 'session.update', + session: { + modalities: ['text'], + model: 'gpt-4o-realtime-preview', + }, + }); + rt.send({ + type: 'session.update', + session: { + modalities: ['text'], + model: 'gpt-4o-realtime-preview', + }, + }); + + rt.send({ + type: 'conversation.item.create', + item: { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }], + }, + }); + + rt.send({ type: 'response.create' }); + }); + + rt.on('error', (err) => { + // in a real world scenario this should be logged somewhere as you + // likely want to continue procesing events regardless of any errors + throw err; + }); + + rt.on('session.created', (event) => { + console.log('session created!', event.session); + console.log(); + }); + + rt.on('response.text.delta', (event) => process.stdout.write(event.delta)); + rt.on('response.text.done', () => console.log()); + + rt.on('response.done', () => rt.close()); + + rt.socket.on('close', () => console.log('\nConnection closed!')); +} + +main(); diff --git a/examples/package.json b/examples/package.json index b8c34ac45..70ec2c523 100644 --- a/examples/package.json +++ b/examples/package.json @@ -7,6 +7,7 @@ "private": true, "dependencies": { "@azure/identity": "^4.2.0", + "dotenv": "^16.4.7", "express": "^4.18.2", "next": "^14.1.1", "openai": "file:..", diff --git a/examples/realtime/websocket.ts b/examples/realtime/websocket.ts index 0da131bc3..4ef8b9c7b 100644 --- a/examples/realtime/websocket.ts +++ b/examples/realtime/websocket.ts @@ -2,6 +2,7 @@ import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket'; async function main() { const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' }); + await rt.open(); // access the underlying `ws.WebSocket` instance rt.socket.addEventListener('open', () => { diff --git a/examples/realtime/ws.ts b/examples/realtime/ws.ts index 4bbe85e5d..5c3f0440c 100644 --- a/examples/realtime/ws.ts +++ b/examples/realtime/ws.ts @@ -2,6 +2,7 @@ import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws'; async function main() { const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' }); + await rt.open(); // access the underlying `ws.WebSocket` instance rt.socket.on('open', () => { @@ -9,7 +10,7 @@ async function main() { rt.send({ type: 'session.update', session: { - modalities: ['foo'] as any, + modalities: ['text'], model: 'gpt-4o-realtime-preview', }, }); diff --git a/src/beta/realtime/internal-base.ts b/src/beta/realtime/internal-base.ts index 391d69911..219dfc478 100644 --- a/src/beta/realtime/internal-base.ts +++ b/src/beta/realtime/internal-base.ts @@ -1,6 +1,7 @@ import { RealtimeClientEvent, RealtimeServerEvent, ErrorEvent } from '../../resources/beta/realtime/realtime'; import { EventEmitter } from '../../lib/EventEmitter'; import { OpenAIError } from '../../error'; +import OpenAI, { AzureOpenAI } from 'openai'; export class OpenAIRealtimeError extends OpenAIError { /** @@ -73,11 +74,16 @@ export abstract class OpenAIRealtimeEmitter extends EventEmitter } } -export function buildRealtimeURL(props: { baseURL: string; model: string }): URL { +export function buildRealtimeURL(client: Pick, model: string): URL { const path = '/realtime'; - - const url = new URL(props.baseURL + (props.baseURL.endsWith('/') ? path.slice(1) : path)); + const baseURL = client.baseURL; + const url = new URL(baseURL + (baseURL.endsWith('/') ? path.slice(1) : path)); url.protocol = 'wss'; - url.searchParams.set('model', props.model); + if (client instanceof AzureOpenAI) { + url.searchParams.set('api-version', client.apiVersion); + url.searchParams.set('deployment', model); + } else { + url.searchParams.set('model', model); + } return url; } diff --git a/src/beta/realtime/websocket.ts b/src/beta/realtime/websocket.ts index e0853779d..26ad17ab5 100644 --- a/src/beta/realtime/websocket.ts +++ b/src/beta/realtime/websocket.ts @@ -1,4 +1,4 @@ -import { OpenAI } from '../../index'; +import { AzureOpenAI, OpenAI } from '../../index'; import { OpenAIError } from '../../error'; import * as Core from '../../core'; import type { RealtimeClientEvent, RealtimeServerEvent } from '../../resources/beta/realtime/realtime'; @@ -27,7 +27,9 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter { model: string; dangerouslyAllowBrowser?: boolean; }, - client?: Pick, + private client: Pick = new OpenAI({ + dangerouslyAllowBrowser: props.dangerouslyAllowBrowser, + }), ) { super(); @@ -41,16 +43,31 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter { "It looks like you're running in a browser-like environment.\n\nThis is disabled by default, as it risks exposing your secret API credentials to attackers.\n\nYou can avoid this error by creating an ephemeral session token:\nhttps://platform.openai.com/docs/api-reference/realtime-sessions\n", ); } + this.url = buildRealtimeURL(this.client, props.model); + } - client ??= new OpenAI({ dangerouslyAllowBrowser }); - - this.url = buildRealtimeURL({ baseURL: client.baseURL, model: props.model }); - // @ts-ignore - this.socket = new WebSocket(this.url, [ - 'realtime', - `openai-insecure-api-key.${client.apiKey}`, - 'openai-beta.realtime-v1', - ]); + async open(): Promise { + if (this.client instanceof AzureOpenAI) { + if (this.client.apiKey !== '') { + this.url.searchParams.set('api-key', this.client.apiKey); + } else { + const token = await this.client.getAzureADToken(); + if (token) { + this.url.searchParams.set('Authorization', `Bearer ${token}`); + } else { + throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.'); + } + } + // @ts-ignore + this.socket = new WebSocket(this.url, ['realtime', 'openai-beta.realtime-v1']); + } else { + // @ts-ignore + this.socket = new WebSocket(this.url, [ + 'realtime', + `openai-insecure-api-key.${this.client.apiKey}`, + 'openai-beta.realtime-v1', + ]); + } this.socket.addEventListener('message', (websocketEvent: MessageEvent) => { const event = (() => { @@ -80,6 +97,9 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter { } send(event: RealtimeClientEvent) { + if (!this.socket) { + throw new Error('The socket is not open, call `open` first'); + } try { this.socket.send(JSON.stringify(event)); } catch (err) { diff --git a/src/beta/realtime/ws.ts b/src/beta/realtime/ws.ts index 631a36cd2..e34791e27 100644 --- a/src/beta/realtime/ws.ts +++ b/src/beta/realtime/ws.ts @@ -1,28 +1,60 @@ import * as WS from 'ws'; -import { OpenAI } from '../../index'; +import { AzureOpenAI, OpenAI } from '../../index'; import type { RealtimeClientEvent, RealtimeServerEvent } from '../../resources/beta/realtime/realtime'; import { OpenAIRealtimeEmitter, buildRealtimeURL } from './internal-base'; export class OpenAIRealtimeWS extends OpenAIRealtimeEmitter { url: URL; socket: WS.WebSocket; + options: WS.ClientOptions | undefined; constructor( props: { model: string; options?: WS.ClientOptions | undefined }, - client?: Pick, + private client: Pick = new OpenAI(), ) { super(); - client ??= new OpenAI(); + this.options = props.options; + this.url = buildRealtimeURL(client, props.model); + this.socket = undefined as any; + } - this.url = buildRealtimeURL({ baseURL: client.baseURL, model: props.model }); - this.socket = new WS.WebSocket(this.url, { - ...props.options, - headers: { - ...props.options?.headers, - Authorization: `Bearer ${client.apiKey}`, - 'OpenAI-Beta': 'realtime=v1', - }, - }); + async open(): Promise { + const headers = { + ...this.options?.headers, + 'OpenAI-Beta': 'realtime=v1', + }; + if (this.client instanceof AzureOpenAI) { + if (this.client.apiKey !== '') { + this.socket = new WS.WebSocket(this.url, { + ...this.options, + headers: { + ...headers, + 'api-key': this.client.apiKey, + }, + }); + } else { + const token = await this.client.getAzureADToken(); + if (token) { + this.socket = new WS.WebSocket(this.url, { + ...this.options, + headers: { + ...headers, + Authorization: `Bearer ${token}`, + }, + }); + } else { + throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.'); + } + } + } else { + this.socket = new WS.WebSocket(this.url, { + ...this.options, + headers: { + ...headers, + Authorization: `Bearer ${this.client.apiKey}`, + }, + }); + } this.socket.on('message', (wsEvent) => { const event = (() => { @@ -52,6 +84,9 @@ export class OpenAIRealtimeWS extends OpenAIRealtimeEmitter { } send(event: RealtimeClientEvent) { + if (!this.socket) { + throw new Error('The socket is not open, call `open` first'); + } try { this.socket.send(JSON.stringify(event)); } catch (err) { @@ -61,7 +96,7 @@ export class OpenAIRealtimeWS extends OpenAIRealtimeEmitter { close(props?: { code: number; reason: string }) { try { - this.socket.close(props?.code ?? 1000, props?.reason ?? 'OK'); + this.socket?.close(props?.code ?? 1000, props?.reason ?? 'OK'); } catch (err) { this._onError(null, 'could not close the connection', err); } diff --git a/src/index.ts b/src/index.ts index 944def00f..4a43ebe10 100644 --- a/src/index.ts +++ b/src/index.ts @@ -597,7 +597,7 @@ export class AzureOpenAI extends OpenAI { return super.buildRequest(options, props); } - private async _getAzureADToken(): Promise { + async getAzureADToken(): Promise { if (typeof this._azureADTokenProvider === 'function') { const token = await this._azureADTokenProvider(); if (!token || typeof token !== 'string') { @@ -624,7 +624,7 @@ export class AzureOpenAI extends OpenAI { if (opts.headers?.['api-key']) { return super.prepareOptions(opts); } - const token = await this._getAzureADToken(); + const token = await this.getAzureADToken(); opts.headers ??= {}; if (token) { opts.headers['Authorization'] = `Bearer ${token}`;