diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index a0c8c3e19..7443ba284 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "5.16.0"
+  ".": "5.17.0"
 }
diff --git a/.stats.yml b/.stats.yml
index 5ad90ac5a..ebe81d146 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 119
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-8517ffa1004e31ca2523d617629e64be6fe4f13403ddfd9db5b3be002656cbde.yml
-openapi_spec_hash: b64dd8c8b23082a7aa2a3e5c5fffd8bd
-config_hash: fe0ea26680ac2075a6cd66416aefe7db
+configured_endpoints: 118
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-356b4364203ff36d7724074cd04f6e684253bfcc3c9d969122d730aa7bc51b46.yml
+openapi_spec_hash: 4ab8e96f52699bc3d2b0c4432aa92af8
+config_hash: b854932c0ea24b400bdd64e4376936bd
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2dd01aa0c..c358929fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## 5.17.0 (2025-09-02)
+
+Full Changelog: [v5.16.0...v5.17.0](https://github.com/openai/openai-node/compare/v5.16.0...v5.17.0)
+
+### Features
+
+* **api:** realtime API updates ([e817255](https://github.com/openai/openai-node/commit/e817255e6ff9e3ad6bd08b001644c335e0459537))
+
+
+### Chores
+
+* **internal:** update global Error reference ([e566ff3](https://github.com/openai/openai-node/commit/e566ff321642a100756224b75a67d44e262e5bea))
+
 ## 5.16.0 (2025-08-26)
 
 Full Changelog: [v5.15.0...v5.16.0](https://github.com/openai/openai-node/compare/v5.15.0...v5.16.0)
diff --git a/README.md b/README.md
index 9864a4829..351d9c0dc 100644
--- a/README.md
+++ b/README.md
@@ -264,14 +264,14 @@ const { data: stream, request_id } = await openai.chat.completions
   .withResponse();
 ```
 
-## Realtime API Beta
+## Realtime API
 
 The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a `WebSocket` connection.
 
 ```ts
-import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
+import { OpenAIRealtimeWebSocket } from 'openai/realtime/websocket';
 
-const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
+const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-realtime' });
 
 rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
 ```
@@ -401,14 +401,14 @@ while (page.hasNextPage()) {
 }
 ```
 
-## Realtime API Beta
+## Realtime API
 
 The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a `WebSocket` connection.
 
 ```ts
-import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
+import { OpenAIRealtimeWebSocket } from 'openai/realtime/websocket';
 
-const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
+const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-realtime' });
 
 rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
 ```
diff --git a/api.md b/api.md
index d133b6162..e8a4c861d 100644
--- a/api.md
+++ b/api.md
@@ -381,6 +381,7 @@ Types:
 - <code><a href="./src/resources/webhooks.ts">FineTuningJobCancelledWebhookEvent</a></code>
 - <code><a href="./src/resources/webhooks.ts">FineTuningJobFailedWebhookEvent</a></code>
 - <code><a href="./src/resources/webhooks.ts">FineTuningJobSucceededWebhookEvent</a></code>
+- <code><a href="./src/resources/webhooks.ts">RealtimeCallIncomingWebhookEvent</a></code>
 - <code><a href="./src/resources/webhooks.ts">ResponseCancelledWebhookEvent</a></code>
 - <code><a href="./src/resources/webhooks.ts">ResponseCompletedWebhookEvent</a></code>
 - <code><a href="./src/resources/webhooks.ts">ResponseFailedWebhookEvent</a></code>
@@ -751,6 +752,7 @@ Types:
 - <code><a href="./src/resources/responses/responses.ts">ToolChoiceMcp</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ToolChoiceOptions</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ToolChoiceTypes</a></code>
+- <code><a href="./src/resources/responses/responses.ts">WebSearchPreviewTool</a></code>
 - <code><a href="./src/resources/responses/responses.ts">WebSearchTool</a></code>
 
 Methods:
@@ -770,6 +772,110 @@ Methods:
 
 - <code title="get /responses/{response_id}/input_items">client.responses.inputItems.<a href="./src/resources/responses/input-items.ts">list</a>(responseID, { ...params }) -> ResponseItemsPage</code>
 
+# Realtime
+
+Types:
+
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationCreatedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItem</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemAdded</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemCreateEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemCreatedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemDeleteEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemDeletedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemDone</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemInputAudioTranscriptionCompletedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemInputAudioTranscriptionDeltaEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemInputAudioTranscriptionFailedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemInputAudioTranscriptionSegment</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemRetrieveEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemTruncateEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemTruncatedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ConversationItemWithReference</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferAppendEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferClearEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferClearedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferCommitEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferCommittedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferSpeechStartedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferSpeechStoppedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">InputAudioBufferTimeoutTriggered</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">LogProbProperties</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">McpListToolsCompleted</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">McpListToolsFailed</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">McpListToolsInProgress</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">OutputAudioBufferClearEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RateLimitsUpdatedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeAudioConfig</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeClientEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeClientSecretConfig</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeConversationItemAssistantMessage</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeConversationItemFunctionCall</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeConversationItemFunctionCallOutput</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeConversationItemSystemMessage</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeConversationItemUserMessage</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeError</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeErrorEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpApprovalRequest</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpApprovalResponse</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpListTools</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpProtocolError</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpToolCall</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpToolExecutionError</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcphttpError</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponse</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponseStatus</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponseUsage</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponseUsageInputTokenDetails</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponseUsageOutputTokenDetails</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeServerEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeSession</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeSessionCreateRequest</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeToolChoiceConfig</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeToolsConfig</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeToolsConfigUnion</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeTracingConfig</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeTranscriptionSessionCreateRequest</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeTruncation</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseAudioDeltaEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseAudioDoneEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseAudioTranscriptDeltaEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseAudioTranscriptDoneEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseCancelEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseContentPartAddedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseContentPartDoneEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseCreateEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseCreatedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseDoneEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseFunctionCallArgumentsDeltaEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseFunctionCallArgumentsDoneEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseMcpCallArgumentsDelta</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseMcpCallArgumentsDone</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseMcpCallCompleted</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseMcpCallFailed</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseMcpCallInProgress</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseOutputItemAddedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseOutputItemDoneEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseTextDeltaEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">ResponseTextDoneEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">SessionCreatedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">SessionUpdateEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">SessionUpdatedEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">TranscriptionSessionCreated</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">TranscriptionSessionUpdate</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">TranscriptionSessionUpdatedEvent</a></code>
+
+## ClientSecrets
+
+Types:
+
+- <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeSessionCreateResponse</a></code>
+- <code><a href="./src/resources/realtime/client-secrets.ts">ClientSecretCreateResponse</a></code>
+
+Methods:
+
+- <code title="post /realtime/client_secrets">client.realtime.clientSecrets.<a href="./src/resources/realtime/client-secrets.ts">create</a>({ ...params }) -> ClientSecretCreateResponse</code>
+
 # Conversations
 
 Types:
diff --git a/examples/azure/realtime/websocket.ts b/examples/azure/realtime/websocket.ts
index 91fe3b7b9..146f7f94e 100644
--- a/examples/azure/realtime/websocket.ts
+++ b/examples/azure/realtime/websocket.ts
@@ -1,4 +1,4 @@
-import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
+import { OpenAIRealtimeWebSocket } from 'openai/realtime/websocket';
 import { AzureOpenAI } from 'openai';
 import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
 import 'dotenv/config';
@@ -21,8 +21,9 @@ async function main() {
     rt.send({
       type: 'session.update',
       session: {
-        modalities: ['text'],
+        output_modalities: ['text'],
         model: 'gpt-4o-realtime-preview',
+        type: 'realtime',
       },
     });
 
@@ -49,8 +50,8 @@ async function main() {
     console.log();
   });
 
-  rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
-  rt.on('response.text.done', () => console.log());
+  rt.on('response.output_text.delta', (event) => process.stdout.write(event.delta));
+  rt.on('response.output_text.done', () => console.log());
 
   rt.on('response.done', () => rt.close());
 
diff --git a/examples/azure/realtime/ws.ts b/examples/azure/realtime/ws.ts
index 8b22aeef0..83f8c6297 100644
--- a/examples/azure/realtime/ws.ts
+++ b/examples/azure/realtime/ws.ts
@@ -1,5 +1,5 @@
 import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity';
-import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';
+import { OpenAIRealtimeWS } from 'openai/realtime/ws';
 import { AzureOpenAI } from 'openai';
 import 'dotenv/config';
 
@@ -21,8 +21,9 @@ async function main() {
     rt.send({
       type: 'session.update',
       session: {
-        modalities: ['text'],
+        output_modalities: ['text'],
         model: 'gpt-4o-realtime-preview',
+        type: 'realtime',
       },
     });
 
@@ -49,8 +50,8 @@ async function main() {
     console.log();
   });
 
-  rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
-  rt.on('response.text.done', () => console.log());
+  rt.on('response.output_text.delta', (event) => process.stdout.write(event.delta));
+  rt.on('response.output_text.done', () => console.log());
 
   rt.on('response.done', () => rt.close());
 
diff --git a/examples/realtime/websocket.ts b/examples/realtime/websocket.ts
index 6fb4740af..bf61db9ac 100644
--- a/examples/realtime/websocket.ts
+++ b/examples/realtime/websocket.ts
@@ -1,7 +1,7 @@
-import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
+import { OpenAIRealtimeWebSocket } from 'openai/realtime/websocket';
 
 async function main() {
-  const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
+  const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-realtime' });
 
   // access the underlying `ws.WebSocket` instance
   rt.socket.addEventListener('open', () => {
@@ -9,8 +9,9 @@ async function main() {
     rt.send({
       type: 'session.update',
       session: {
-        modalities: ['text'],
+        output_modalities: ['text'],
         model: 'gpt-4o-realtime-preview',
+        type: 'realtime',
       },
     });
 
@@ -37,8 +38,8 @@ async function main() {
     console.log();
   });
 
-  rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
-  rt.on('response.text.done', () => console.log());
+  rt.on('response.output_text.delta', (event) => process.stdout.write(event.delta));
+  rt.on('response.output_text.done', () => console.log());
 
   rt.on('response.done', () => rt.close());
 
diff --git a/examples/realtime/ws.ts b/examples/realtime/ws.ts
index 6cc950b76..ba22e262a 100644
--- a/examples/realtime/ws.ts
+++ b/examples/realtime/ws.ts
@@ -1,7 +1,7 @@
-import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';
+import { OpenAIRealtimeWS } from 'openai/realtime/ws';
 
 async function main() {
-  const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
+  const rt = new OpenAIRealtimeWS({ model: 'gpt-realtime' });
 
   // access the underlying `ws.WebSocket` instance
   rt.socket.on('open', () => {
@@ -9,8 +9,9 @@ async function main() {
     rt.send({
       type: 'session.update',
       session: {
-        modalities: ['text'],
+        output_modalities: ['text'],
         model: 'gpt-4o-realtime-preview',
+        type: 'realtime',
       },
     });
 
@@ -37,8 +38,8 @@ async function main() {
     console.log();
   });
 
-  rt.on('response.text.delta', (event) => process.stdout.write(event.delta));
-  rt.on('response.text.done', () => console.log());
+  rt.on('response.output_text.delta', (event) => process.stdout.write(event.delta));
+  rt.on('response.output_text.done', () => console.log());
 
   rt.on('response.done', () => rt.close());
 
diff --git a/jsr.json b/jsr.json
index 2996d8f66..cf46e84e3 100644
--- a/jsr.json
+++ b/jsr.json
@@ -1,6 +1,6 @@
 {
   "name": "@openai/openai",
-  "version": "5.16.0",
+  "version": "5.17.0",
   "exports": {
     ".": "./index.ts",
     "./helpers/zod": "./helpers/zod.ts",
diff --git a/jsr.json.orig b/jsr.json.orig
index 3e7c40d5f..30eac2430 100644
--- a/jsr.json.orig
+++ b/jsr.json.orig
@@ -5,6 +5,7 @@
     ".": "./index.ts",
     "./helpers/zod": "./helpers/zod.ts",
     "./beta/realtime/websocket": "./beta/realtime/websocket.ts"
+    "./realtime/websocket": "./realtime/websocket.ts"
   },
   "imports": {
     "zod": "npm:zod@3"
diff --git a/package.json b/package.json
index b3a4f4685..ccff023c1 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "openai",
-  "version": "5.16.0",
+  "version": "5.17.0",
   "description": "The official TypeScript library for the OpenAI API",
   "author": "OpenAI <support@openai.com>",
   "types": "dist/index.d.ts",
diff --git a/realtime.md b/realtime.md
index 9842ad453..1f47600e4 100644
--- a/realtime.md
+++ b/realtime.md
@@ -1,4 +1,4 @@
-## Realtime API beta
+## Realtime API
 
 The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a `WebSocket` connection.
 
@@ -10,9 +10,9 @@ Basic text based example with `ws`:
 
 ```ts
 // requires `yarn add ws @types/ws`
-import { OpenAIRealtimeWS } from 'openai/beta/realtime/ws';
+import { OpenAIRealtimeWS } from 'openai/realtime/ws';
 
-const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
+const rt = new OpenAIRealtimeWS({ model: 'gpt-realtime' });
 
 // access the underlying `ws.WebSocket` instance
 rt.socket.on('open', () => {
@@ -59,9 +59,9 @@ rt.socket.on('close', () => console.log('\nConnection closed!'));
 To use the web API `WebSocket` implementation, replace `OpenAIRealtimeWS` with `OpenAIRealtimeWebSocket` and adjust any `rt.socket` access:
 
 ```ts
-import { OpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket';
+import { OpenAIRealtimeWebSocket } from 'openai/realtime/websocket';
 
-const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-4o-realtime-preview-2024-12-17' });
+const rt = new OpenAIRealtimeWebSocket({ model: 'gpt-realtime' });
 // ...
 rt.socket.addEventListener('open', () => {
   // ...
@@ -77,7 +77,7 @@ When an error is encountered, either on the client side or returned from the ser
 It is **highly recommended** that you register an `error` event listener and handle errors appropriately as typically the underlying connection is still usable.
 
 ```ts
-const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview-2024-12-17' });
+const rt = new OpenAIRealtimeWS({ model: 'gpt-realtime' });
 rt.on('error', (err) => {
   // in a real world scenario this should be logged somewhere as you
   // likely want to continue processing events regardless of any errors
diff --git a/scripts/detect-breaking-changes b/scripts/detect-breaking-changes
index 9f5a00452..85607de43 100755
--- a/scripts/detect-breaking-changes
+++ b/scripts/detect-breaking-changes
@@ -44,6 +44,8 @@ TEST_PATHS=(
 	tests/api-resources/uploads/parts.test.ts
 	tests/api-resources/responses/responses.test.ts
 	tests/api-resources/responses/input-items.test.ts
+	tests/api-resources/realtime/realtime.test.ts
+	tests/api-resources/realtime/client-secrets.test.ts
 	tests/api-resources/conversations/conversations.test.ts
 	tests/api-resources/conversations/items.test.ts
 	tests/api-resources/evals/evals.test.ts
diff --git a/src/client.ts b/src/client.ts
index 78e29664a..a853d2890 100644
--- a/src/client.ts
+++ b/src/client.ts
@@ -121,6 +121,7 @@ import {
 } from './resources/evals/evals';
 import { FineTuning } from './resources/fine-tuning/fine-tuning';
 import { Graders } from './resources/graders/graders';
+import { Realtime } from './resources/realtime/realtime';
 import { Responses } from './resources/responses/responses';
 import {
   Upload,
@@ -574,7 +575,7 @@ export class OpenAI {
     const response = await this.fetchWithTimeout(url, req, timeout, controller).catch(castToError);
     const headersTime = Date.now();
 
-    if (response instanceof Error) {
+    if (response instanceof globalThis.Error) {
       const retryMessage = `retrying, ${retriesRemaining} attempts remaining`;
       if (options.signal?.aborted) {
         throw new Errors.APIUserAbortError();
@@ -962,6 +963,7 @@ export class OpenAI {
   batches: API.Batches = new API.Batches(this);
   uploads: API.Uploads = new API.Uploads(this);
   responses: API.Responses = new API.Responses(this);
+  realtime: API.Realtime = new API.Realtime(this);
   conversations: API.Conversations = new API.Conversations(this);
   evals: API.Evals = new API.Evals(this);
   containers: API.Containers = new API.Containers(this);
@@ -983,6 +985,7 @@ OpenAI.Beta = Beta;
 OpenAI.Batches = Batches;
 OpenAI.Uploads = UploadsAPIUploads;
 OpenAI.Responses = Responses;
+OpenAI.Realtime = Realtime;
 OpenAI.Conversations = Conversations;
 OpenAI.Evals = Evals;
 OpenAI.Containers = Containers;
@@ -1165,6 +1168,8 @@ export declare namespace OpenAI {
 
   export { Responses as Responses };
 
+  export { Realtime as Realtime };
+
   export { Conversations as Conversations };
 
   export {
diff --git a/src/realtime/index.ts b/src/realtime/index.ts
new file mode 100644
index 000000000..75f0f3088
--- /dev/null
+++ b/src/realtime/index.ts
@@ -0,0 +1 @@
+export { OpenAIRealtimeError } from './internal-base';
diff --git a/src/realtime/internal-base.ts b/src/realtime/internal-base.ts
new file mode 100644
index 000000000..92cc1d1c6
--- /dev/null
+++ b/src/realtime/internal-base.ts
@@ -0,0 +1,98 @@
+import {
+  RealtimeClientEvent,
+  RealtimeServerEvent,
+  RealtimeErrorEvent,
+  RealtimeError,
+} from '../resources/realtime/realtime';
+import { EventEmitter } from '../lib/EventEmitter';
+import { OpenAIError } from '../error';
+import OpenAI, { AzureOpenAI } from '../index';
+
+export class OpenAIRealtimeError extends OpenAIError {
+  /**
+   * The error data that the API sent back in an `error` event.
+   */
+  error?: RealtimeError | undefined;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id?: string | undefined;
+
+  constructor(message: string, event: RealtimeErrorEvent | null) {
+    super(message);
+
+    this.error = event?.error;
+    this.event_id = event?.event_id;
+  }
+}
+
+type Simplify<T> = { [KeyType in keyof T]: T[KeyType] } & {};
+
+type RealtimeEvents = Simplify<
+  {
+    event: (event: RealtimeServerEvent) => void;
+    error: (error: OpenAIRealtimeError) => void;
+  } & {
+    [EventType in Exclude<RealtimeServerEvent['type'], 'error'>]: (
+      event: Extract<RealtimeServerEvent, { type: EventType }>,
+    ) => unknown;
+  }
+>;
+
+export abstract class OpenAIRealtimeEmitter extends EventEmitter<RealtimeEvents> {
+  /**
+   * Send an event to the API.
+   */
+  abstract send(event: RealtimeClientEvent): void;
+
+  /**
+   * Close the websocket connection.
+   */
+  abstract close(props?: { code: number; reason: string }): void;
+
+  protected _onError(event: null, message: string, cause: any): void;
+  protected _onError(event: RealtimeErrorEvent, message?: string | undefined): void;
+  protected _onError(event: RealtimeErrorEvent | null, message?: string | undefined, cause?: any): void {
+    message =
+      event?.error ?
+        `${event.error.message} code=${event.error.code} param=${event.error.param} type=${event.error.type} event_id=${event.error.event_id}`
+      : message ?? 'unknown error';
+
+    if (!this._hasListener('error')) {
+      const error = new OpenAIRealtimeError(
+        message +
+          `\n\nTo resolve these unhandled rejection errors you should bind an \`error\` callback, e.g. \`rt.on('error', (error) => ...)\` `,
+        event,
+      );
+      // @ts-ignore
+      error.cause = cause;
+      Promise.reject(error);
+      return;
+    }
+
+    const error = new OpenAIRealtimeError(message, event);
+    // @ts-ignore
+    error.cause = cause;
+
+    this._emit('error', error);
+  }
+}
+
+export function isAzure(client: Pick<OpenAI, 'apiKey' | 'baseURL'>): client is AzureOpenAI {
+  return client instanceof AzureOpenAI;
+}
+
+export function buildRealtimeURL(client: Pick<OpenAI, 'apiKey' | 'baseURL'>, model: string): URL {
+  const path = '/realtime';
+  const baseURL = client.baseURL;
+  const url = new URL(baseURL + (baseURL.endsWith('/') ? path.slice(1) : path));
+  url.protocol = 'wss';
+  if (isAzure(client)) {
+    url.searchParams.set('api-version', client.apiVersion);
+    url.searchParams.set('deployment', model);
+  } else {
+    url.searchParams.set('model', model);
+  }
+  return url;
+}
diff --git a/src/realtime/websocket.ts b/src/realtime/websocket.ts
new file mode 100644
index 000000000..c83b2cf05
--- /dev/null
+++ b/src/realtime/websocket.ts
@@ -0,0 +1,142 @@
+import { AzureOpenAI, OpenAI } from '../index';
+import { OpenAIError } from '../error';
+import type { RealtimeClientEvent, RealtimeServerEvent } from '../resources/realtime/realtime';
+import { OpenAIRealtimeEmitter, buildRealtimeURL, isAzure } from './internal-base';
+import { isRunningInBrowser } from '../internal/detect-platform';
+
+interface MessageEvent {
+  data: string;
+}
+
+type _WebSocket =
+  typeof globalThis extends (
+    {
+      WebSocket: infer ws extends abstract new (...args: any) => any;
+    }
+  ) ?
+    // @ts-ignore
+    InstanceType<ws>
+  : any;
+
+export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter {
+  url: URL;
+  socket: _WebSocket;
+
+  constructor(
+    props: {
+      model: string;
+      dangerouslyAllowBrowser?: boolean;
+      /**
+       * Callback to mutate the URL, needed for Azure.
+       * @internal
+       */
+      onURL?: (url: URL) => void;
+    },
+    client?: Pick<OpenAI, 'apiKey' | 'baseURL'>,
+  ) {
+    super();
+
+    const dangerouslyAllowBrowser =
+      props.dangerouslyAllowBrowser ??
+      (client as any)?._options?.dangerouslyAllowBrowser ??
+      (client?.apiKey.startsWith('ek_') ? true : null);
+
+    if (!dangerouslyAllowBrowser && isRunningInBrowser()) {
+      throw new OpenAIError(
+        "It looks like you're running in a browser-like environment.\n\nThis is disabled by default, as it risks exposing your secret API credentials to attackers.\n\nYou can avoid this error by creating an ephemeral session token:\nhttps://platform.openai.com/docs/api-reference/realtime-sessions\n",
+      );
+    }
+
+    client ??= new OpenAI({ dangerouslyAllowBrowser });
+
+    this.url = buildRealtimeURL(client, props.model);
+    props.onURL?.(this.url);
+
+    // @ts-ignore
+    this.socket = new WebSocket(this.url.toString(), [
+      'realtime',
+      ...(isAzure(client) ? [] : [`openai-insecure-api-key.${client.apiKey}`]),
+    ]);
+
+    this.socket.addEventListener('message', (websocketEvent: MessageEvent) => {
+      const event = (() => {
+        try {
+          return JSON.parse(websocketEvent.data.toString()) as RealtimeServerEvent;
+        } catch (err) {
+          this._onError(null, 'could not parse websocket event', err);
+          return null;
+        }
+      })();
+
+      if (event) {
+        this._emit('event', event);
+
+        if (event.type === 'error') {
+          this._onError(event);
+        } else {
+          // @ts-expect-error TS isn't smart enough to get the relationship right here
+          this._emit(event.type, event);
+        }
+      }
+    });
+
+    this.socket.addEventListener('error', (event: any) => {
+      this._onError(null, event.message, null);
+    });
+
+    if (isAzure(client)) {
+      if (this.url.searchParams.get('Authorization') !== null) {
+        this.url.searchParams.set('Authorization', '<REDACTED>');
+      } else {
+        this.url.searchParams.set('api-key', '<REDACTED>');
+      }
+    }
+  }
+
+  static async azure(
+    client: Pick<AzureOpenAI, '_getAzureADToken' | 'apiVersion' | 'apiKey' | 'baseURL' | 'deploymentName'>,
+    options: { deploymentName?: string; dangerouslyAllowBrowser?: boolean } = {},
+  ): Promise<OpenAIRealtimeWebSocket> {
+    const token = await client._getAzureADToken();
+    function onURL(url: URL) {
+      if (client.apiKey !== '<Missing Key>') {
+        url.searchParams.set('api-key', client.apiKey);
+      } else {
+        if (token) {
+          url.searchParams.set('Authorization', `Bearer ${token}`);
+        } else {
+          throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.');
+        }
+      }
+    }
+    const deploymentName = options.deploymentName ?? client.deploymentName;
+    if (!deploymentName) {
+      throw new Error('No deployment name provided');
+    }
+    const { dangerouslyAllowBrowser } = options;
+    return new OpenAIRealtimeWebSocket(
+      {
+        model: deploymentName,
+        onURL,
+        ...(dangerouslyAllowBrowser ? { dangerouslyAllowBrowser } : {}),
+      },
+      client,
+    );
+  }
+
+  send(event: RealtimeClientEvent) {
+    try {
+      this.socket.send(JSON.stringify(event));
+    } catch (err) {
+      this._onError(null, 'could not send data', err);
+    }
+  }
+
+  close(props?: { code: number; reason: string }) {
+    try {
+      this.socket.close(props?.code ?? 1000, props?.reason ?? 'OK');
+    } catch (err) {
+      this._onError(null, 'could not close the connection', err);
+    }
+  }
+}
diff --git a/src/realtime/ws.ts b/src/realtime/ws.ts
new file mode 100644
index 000000000..5226d6601
--- /dev/null
+++ b/src/realtime/ws.ts
@@ -0,0 +1,95 @@
+import * as WS from 'ws';
+import { AzureOpenAI, OpenAI } from '../index';
+import type { RealtimeClientEvent, RealtimeServerEvent } from '../resources/realtime/realtime';
+import { OpenAIRealtimeEmitter, buildRealtimeURL, isAzure } from './internal-base';
+
+export class OpenAIRealtimeWS extends OpenAIRealtimeEmitter {
+  url: URL;
+  socket: WS.WebSocket;
+
+  constructor(
+    props: { model: string; options?: WS.ClientOptions | undefined },
+    client?: Pick<OpenAI, 'apiKey' | 'baseURL'>,
+  ) {
+    super();
+    client ??= new OpenAI();
+
+    this.url = buildRealtimeURL(client, props.model);
+    this.socket = new WS.WebSocket(this.url, {
+      ...props.options,
+      headers: {
+        ...props.options?.headers,
+        ...(isAzure(client) ? {} : { Authorization: `Bearer ${client.apiKey}` }),
+      },
+    });
+
+    this.socket.on('message', (wsEvent) => {
+      const event = (() => {
+        try {
+          return JSON.parse(wsEvent.toString()) as RealtimeServerEvent;
+        } catch (err) {
+          this._onError(null, 'could not parse websocket event', err);
+          return null;
+        }
+      })();
+
+      if (event) {
+        this._emit('event', event);
+
+        if (event.type === 'error') {
+          this._onError(event);
+        } else {
+          // @ts-expect-error TS isn't smart enough to get the relationship right here
+          this._emit(event.type, event);
+        }
+      }
+    });
+
+    this.socket.on('error', (err) => {
+      this._onError(null, err.message, err);
+    });
+  }
+
+  static async azure(
+    client: Pick<AzureOpenAI, '_getAzureADToken' | 'apiVersion' | 'apiKey' | 'baseURL' | 'deploymentName'>,
+    options: { deploymentName?: string; options?: WS.ClientOptions | undefined } = {},
+  ): Promise<OpenAIRealtimeWS> {
+    const deploymentName = options.deploymentName ?? client.deploymentName;
+    if (!deploymentName) {
+      throw new Error('No deployment name provided');
+    }
+    return new OpenAIRealtimeWS(
+      { model: deploymentName, options: { headers: await getAzureHeaders(client) } },
+      client,
+    );
+  }
+
+  send(event: RealtimeClientEvent) {
+    try {
+      this.socket.send(JSON.stringify(event));
+    } catch (err) {
+      this._onError(null, 'could not send data', err);
+    }
+  }
+
+  close(props?: { code: number; reason: string }) {
+    try {
+      this.socket.close(props?.code ?? 1000, props?.reason ?? 'OK');
+    } catch (err) {
+      this._onError(null, 'could not close the connection', err);
+    }
+  }
+}
+
+async function getAzureHeaders(client: Pick<AzureOpenAI, '_getAzureADToken' | 'apiKey'>) {
+  if (client.apiKey !== '<Missing Key>') {
+    return { 'api-key': client.apiKey };
+  } else {
+    const token = await client._getAzureADToken();
+    if (token) {
+      return { Authorization: `Bearer ${token}` };
+    } else {
+      throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.');
+    }
+  }
+}
diff --git a/src/resources/audio/speech.ts b/src/resources/audio/speech.ts
index f533a558b..e68e806e0 100644
--- a/src/resources/audio/speech.ts
+++ b/src/resources/audio/speech.ts
@@ -51,7 +51,18 @@ export interface SpeechCreateParams {
    * `verse`. Previews of the voices are available in the
    * [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
    */
-  voice: (string & {}) | 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
+  voice:
+    | (string & {})
+    | 'alloy'
+    | 'ash'
+    | 'ballad'
+    | 'coral'
+    | 'echo'
+    | 'sage'
+    | 'shimmer'
+    | 'verse'
+    | 'marin'
+    | 'cedar';
 
   /**
    * Control the voice of your generated audio with additional instructions. Does not
diff --git a/src/resources/beta/realtime/realtime.ts b/src/resources/beta/realtime/realtime.ts
index 4635c6762..b7fe85dc0 100644
--- a/src/resources/beta/realtime/realtime.ts
+++ b/src/resources/beta/realtime/realtime.ts
@@ -17,6 +17,9 @@ import {
   TranscriptionSessions,
 } from './transcription-sessions';
 
+/**
+ * @deprecated Realtime has now launched and is generally available. The old beta API is now deprecated.
+ */
 export class Realtime extends APIResource {
   sessions: SessionsAPI.Sessions = new SessionsAPI.Sessions(this._client);
   transcriptionSessions: TranscriptionSessionsAPI.TranscriptionSessions =
diff --git a/src/resources/chat/completions/completions.ts b/src/resources/chat/completions/completions.ts
index a71e574e9..17269f25b 100644
--- a/src/resources/chat/completions/completions.ts
+++ b/src/resources/chat/completions/completions.ts
@@ -489,7 +489,18 @@ export interface ChatCompletionAudioParam {
    * The voice the model uses to respond. Supported voices are `alloy`, `ash`,
    * `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, `sage`, and `shimmer`.
    */
-  voice: (string & {}) | 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
+  voice:
+    | (string & {})
+    | 'alloy'
+    | 'ash'
+    | 'ballad'
+    | 'coral'
+    | 'echo'
+    | 'sage'
+    | 'shimmer'
+    | 'verse'
+    | 'marin'
+    | 'cedar';
 }
 
 /**
diff --git a/src/resources/index.ts b/src/resources/index.ts
index 129b1cbd0..fbbc0e3bb 100644
--- a/src/resources/index.ts
+++ b/src/resources/index.ts
@@ -95,6 +95,7 @@ export {
   type ModerationCreateResponse,
   type ModerationCreateParams,
 } from './moderations';
+export { Realtime } from './realtime/realtime';
 export { Responses } from './responses/responses';
 export { Uploads, type Upload, type UploadCreateParams, type UploadCompleteParams } from './uploads/uploads';
 export {
diff --git a/src/resources/realtime.ts b/src/resources/realtime.ts
new file mode 100644
index 000000000..1c5df27d9
--- /dev/null
+++ b/src/resources/realtime.ts
@@ -0,0 +1,3 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+export * from './realtime/index';
diff --git a/src/resources/realtime/client-secrets.ts b/src/resources/realtime/client-secrets.ts
new file mode 100644
index 000000000..c48fe8243
--- /dev/null
+++ b/src/resources/realtime/client-secrets.ts
@@ -0,0 +1,470 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import { APIResource } from '../../core/resource';
+import * as RealtimeAPI from './realtime';
+import { APIPromise } from '../../core/api-promise';
+import { RequestOptions } from '../../internal/request-options';
+
+export class ClientSecrets extends APIResource {
+  /**
+   * Create a Realtime session and client secret for either realtime or
+   * transcription.
+   */
+  create(body: ClientSecretCreateParams, options?: RequestOptions): APIPromise<ClientSecretCreateResponse> {
+    return this._client.post('/realtime/client_secrets', { body, ...options });
+  }
+}
+
+/**
+ * A Realtime session configuration object.
+ */
+export interface RealtimeSessionCreateResponse {
+  /**
+   * Unique identifier for the session that looks like `sess_1234567890abcdef`.
+   */
+  id?: string;
+
+  /**
+   * Configuration for input and output audio for the session.
+   */
+  audio?: RealtimeSessionCreateResponse.Audio;
+
+  /**
+   * Expiration timestamp for the session, in seconds since epoch.
+   */
+  expires_at?: number;
+
+  /**
+   * Additional fields to include in server outputs.
+   *
+   * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+   *   transcription.
+   */
+  include?: Array<'item.input_audio_transcription.logprobs'>;
+
+  /**
+   * The default system instructions (i.e. system message) prepended to model calls.
+   * This field allows the client to guide the model on desired responses. The model
+   * can be instructed on response content and format, (e.g. "be extremely succinct",
+   * "act friendly", "here are examples of good responses") and on audio behavior
+   * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+   * instructions are not guaranteed to be followed by the model, but they provide
+   * guidance to the model on the desired behavior.
+   *
+   * Note that the server sets default instructions which will be used if this field
+   * is not set and are visible in the `session.created` event at the start of the
+   * session.
+   */
+  instructions?: string;
+
+  /**
+   * Maximum number of output tokens for a single assistant response, inclusive of
+   * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+   * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+   */
+  max_output_tokens?: number | 'inf';
+
+  /**
+   * The Realtime model used for this session.
+   */
+  model?: string;
+
+  /**
+   * The object type. Always `realtime.session`.
+   */
+  object?: string;
+
+  /**
+   * The set of modalities the model can respond with. To disable audio, set this to
+   * ["text"].
+   */
+  output_modalities?: Array<'text' | 'audio'>;
+
+  /**
+   * How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+   * a function.
+   */
+  tool_choice?: string;
+
+  /**
+   * Tools (functions) available to the model.
+   */
+  tools?: Array<RealtimeSessionCreateResponse.Tool>;
+
+  /**
+   * Configuration options for tracing. Set to null to disable tracing. Once tracing
+   * is enabled for a session, the configuration cannot be modified.
+   *
+   * `auto` will create a trace for the session with default values for the workflow
+   * name, group id, and metadata.
+   */
+  tracing?: 'auto' | RealtimeSessionCreateResponse.TracingConfiguration;
+
+  /**
+   * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+   * means that the model will detect the start and end of speech based on audio
+   * volume and respond at the end of user speech.
+   */
+  turn_detection?: RealtimeSessionCreateResponse.TurnDetection;
+}
+
+export namespace RealtimeSessionCreateResponse {
+  /**
+   * Configuration for input and output audio for the session.
+   */
+  export interface Audio {
+    input?: Audio.Input;
+
+    output?: Audio.Output;
+  }
+
+  export namespace Audio {
+    export interface Input {
+      /**
+       * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+       */
+      format?: string;
+
+      /**
+       * Configuration for input audio noise reduction.
+       */
+      noise_reduction?: Input.NoiseReduction;
+
+      /**
+       * Configuration for input audio transcription.
+       */
+      transcription?: Input.Transcription;
+
+      /**
+       * Configuration for turn detection.
+       */
+      turn_detection?: Input.TurnDetection;
+    }
+
+    export namespace Input {
+      /**
+       * Configuration for input audio noise reduction.
+       */
+      export interface NoiseReduction {
+        type?: 'near_field' | 'far_field';
+      }
+
+      /**
+       * Configuration for input audio transcription.
+       */
+      export interface Transcription {
+        /**
+         * The language of the input audio.
+         */
+        language?: string;
+
+        /**
+         * The model to use for transcription.
+         */
+        model?: string;
+
+        /**
+         * Optional text to guide the model's style or continue a previous audio segment.
+         */
+        prompt?: string;
+      }
+
+      /**
+       * Configuration for turn detection.
+       */
+      export interface TurnDetection {
+        prefix_padding_ms?: number;
+
+        silence_duration_ms?: number;
+
+        threshold?: number;
+
+        /**
+         * Type of turn detection, only `server_vad` is currently supported.
+         */
+        type?: string;
+      }
+    }
+
+    export interface Output {
+      /**
+       * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+       */
+      format?: string;
+
+      speed?: number;
+
+      voice?:
+        | (string & {})
+        | 'alloy'
+        | 'ash'
+        | 'ballad'
+        | 'coral'
+        | 'echo'
+        | 'sage'
+        | 'shimmer'
+        | 'verse'
+        | 'marin'
+        | 'cedar';
+    }
+  }
+
+  export interface Tool {
+    /**
+     * The description of the function, including guidance on when and how to call it,
+     * and guidance about what to tell the user when calling (if anything).
+     */
+    description?: string;
+
+    /**
+     * The name of the function.
+     */
+    name?: string;
+
+    /**
+     * Parameters of the function in JSON Schema.
+     */
+    parameters?: unknown;
+
+    /**
+     * The type of the tool, i.e. `function`.
+     */
+    type?: 'function';
+  }
+
+  /**
+   * Granular configuration for tracing.
+   */
+  export interface TracingConfiguration {
+    /**
+     * The group id to attach to this trace to enable filtering and grouping in the
+     * traces dashboard.
+     */
+    group_id?: string;
+
+    /**
+     * The arbitrary metadata to attach to this trace to enable filtering in the traces
+     * dashboard.
+     */
+    metadata?: unknown;
+
+    /**
+     * The name of the workflow to attach to this trace. This is used to name the trace
+     * in the traces dashboard.
+     */
+    workflow_name?: string;
+  }
+
+  /**
+   * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+   * means that the model will detect the start and end of speech based on audio
+   * volume and respond at the end of user speech.
+   */
+  export interface TurnDetection {
+    /**
+     * Amount of audio to include before the VAD detected speech (in milliseconds).
+     * Defaults to 300ms.
+     */
+    prefix_padding_ms?: number;
+
+    /**
+     * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+     * With shorter values the model will respond more quickly, but may jump in on
+     * short pauses from the user.
+     */
+    silence_duration_ms?: number;
+
+    /**
+     * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+     * threshold will require louder audio to activate the model, and thus might
+     * perform better in noisy environments.
+     */
+    threshold?: number;
+
+    /**
+     * Type of turn detection, only `server_vad` is currently supported.
+     */
+    type?: string;
+  }
+}
+
+/**
+ * Response from creating a session and client secret for the Realtime API.
+ */
+export interface ClientSecretCreateResponse {
+  /**
+   * Expiration timestamp for the client secret, in seconds since epoch.
+   */
+  expires_at: number;
+
+  /**
+   * The session configuration for either a realtime or transcription session.
+   */
+  session:
+    | RealtimeSessionCreateResponse
+    | ClientSecretCreateResponse.RealtimeTranscriptionSessionCreateResponse;
+
+  /**
+   * The generated client secret value.
+   */
+  value: string;
+}
+
+export namespace ClientSecretCreateResponse {
+  /**
+   * A Realtime transcription session configuration object.
+   */
+  export interface RealtimeTranscriptionSessionCreateResponse {
+    /**
+     * Unique identifier for the session that looks like `sess_1234567890abcdef`.
+     */
+    id?: string;
+
+    /**
+     * Configuration for input audio for the session.
+     */
+    audio?: RealtimeTranscriptionSessionCreateResponse.Audio;
+
+    /**
+     * Expiration timestamp for the session, in seconds since epoch.
+     */
+    expires_at?: number;
+
+    /**
+     * Additional fields to include in server outputs.
+     *
+     * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+     *   transcription.
+     */
+    include?: Array<'item.input_audio_transcription.logprobs'>;
+
+    /**
+     * The object type. Always `realtime.transcription_session`.
+     */
+    object?: string;
+  }
+
+  export namespace RealtimeTranscriptionSessionCreateResponse {
+    /**
+     * Configuration for input audio for the session.
+     */
+    export interface Audio {
+      input?: Audio.Input;
+    }
+
+    export namespace Audio {
+      export interface Input {
+        /**
+         * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+         */
+        format?: string;
+
+        /**
+         * Configuration for input audio noise reduction.
+         */
+        noise_reduction?: Input.NoiseReduction;
+
+        /**
+         * Configuration of the transcription model.
+         */
+        transcription?: Input.Transcription;
+
+        /**
+         * Configuration for turn detection.
+         */
+        turn_detection?: Input.TurnDetection;
+      }
+
+      export namespace Input {
+        /**
+         * Configuration for input audio noise reduction.
+         */
+        export interface NoiseReduction {
+          type?: 'near_field' | 'far_field';
+        }
+
+        /**
+         * Configuration of the transcription model.
+         */
+        export interface Transcription {
+          /**
+           * The language of the input audio. Supplying the input language in
+           * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+           * format will improve accuracy and latency.
+           */
+          language?: string;
+
+          /**
+           * The model to use for transcription. Can be `gpt-4o-transcribe`,
+           * `gpt-4o-mini-transcribe`, or `whisper-1`.
+           */
+          model?: 'gpt-4o-transcribe' | 'gpt-4o-mini-transcribe' | 'whisper-1';
+
+          /**
+           * An optional text to guide the model's style or continue a previous audio
+           * segment. The
+           * [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+           * should match the audio language.
+           */
+          prompt?: string;
+        }
+
+        /**
+         * Configuration for turn detection.
+         */
+        export interface TurnDetection {
+          prefix_padding_ms?: number;
+
+          silence_duration_ms?: number;
+
+          threshold?: number;
+
+          /**
+           * Type of turn detection, only `server_vad` is currently supported.
+           */
+          type?: string;
+        }
+      }
+    }
+  }
+}
+
+export interface ClientSecretCreateParams {
+  /**
+   * Configuration for the ephemeral token expiration.
+   */
+  expires_after?: ClientSecretCreateParams.ExpiresAfter;
+
+  /**
+   * Session configuration to use for the client secret. Choose either a realtime
+   * session or a transcription session.
+   */
+  session?: RealtimeAPI.RealtimeSessionCreateRequest | RealtimeAPI.RealtimeTranscriptionSessionCreateRequest;
+}
+
+export namespace ClientSecretCreateParams {
+  /**
+   * Configuration for the ephemeral token expiration.
+   */
+  export interface ExpiresAfter {
+    /**
+     * The anchor point for the ephemeral token expiration. Only `created_at` is
+     * currently supported.
+     */
+    anchor?: 'created_at';
+
+    /**
+     * The number of seconds from the anchor point to the expiration. Select a value
+     * between `10` and `7200`.
+     */
+    seconds?: number;
+  }
+}
+
+export declare namespace ClientSecrets {
+  export {
+    type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse,
+    type ClientSecretCreateResponse as ClientSecretCreateResponse,
+    type ClientSecretCreateParams as ClientSecretCreateParams,
+  };
+}
diff --git a/src/resources/realtime/index.ts b/src/resources/realtime/index.ts
new file mode 100644
index 000000000..a6c5db35e
--- /dev/null
+++ b/src/resources/realtime/index.ts
@@ -0,0 +1,9 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+export {
+  ClientSecrets,
+  type RealtimeSessionCreateResponse,
+  type ClientSecretCreateResponse,
+  type ClientSecretCreateParams,
+} from './client-secrets';
+export { Realtime } from './realtime';
diff --git a/src/resources/realtime/realtime.ts b/src/resources/realtime/realtime.ts
new file mode 100644
index 000000000..562b2d739
--- /dev/null
+++ b/src/resources/realtime/realtime.ts
@@ -0,0 +1,4351 @@
+// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import { APIResource } from '../../core/resource';
+import * as RealtimeAPI from './realtime';
+import * as Shared from '../shared';
+import * as ClientSecretsAPI from './client-secrets';
+import {
+  ClientSecretCreateParams,
+  ClientSecretCreateResponse,
+  ClientSecrets,
+  RealtimeSessionCreateResponse,
+} from './client-secrets';
+import * as ResponsesAPI from '../responses/responses';
+
+export class Realtime extends APIResource {
+  clientSecrets: ClientSecretsAPI.ClientSecrets = new ClientSecretsAPI.ClientSecrets(this._client);
+}
+
+/**
+ * Returned when a conversation is created. Emitted right after session creation.
+ */
+export interface ConversationCreatedEvent {
+  /**
+   * The conversation resource.
+   */
+  conversation: ConversationCreatedEvent.Conversation;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The event type, must be `conversation.created`.
+   */
+  type: 'conversation.created';
+}
+
+export namespace ConversationCreatedEvent {
+  /**
+   * The conversation resource.
+   */
+  export interface Conversation {
+    /**
+     * The unique ID of the conversation.
+     */
+    id?: string;
+
+    /**
+     * The object type, must be `realtime.conversation`.
+     */
+    object?: 'realtime.conversation';
+  }
+}
+
+/**
+ * A single item within a Realtime conversation.
+ */
+export type ConversationItem =
+  | RealtimeConversationItemSystemMessage
+  | RealtimeConversationItemUserMessage
+  | RealtimeConversationItemAssistantMessage
+  | RealtimeConversationItemFunctionCall
+  | RealtimeConversationItemFunctionCallOutput
+  | RealtimeMcpApprovalResponse
+  | RealtimeMcpListTools
+  | RealtimeMcpToolCall
+  | RealtimeMcpApprovalRequest;
+
+/**
+ * Returned when a conversation item is added.
+ */
+export interface ConversationItemAdded {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * A single item within a Realtime conversation.
+   */
+  item: ConversationItem;
+
+  /**
+   * The event type, must be `conversation.item.added`.
+   */
+  type: 'conversation.item.added';
+
+  /**
+   * The ID of the item that precedes this one, if any. This is used to maintain
+   * ordering when items are inserted.
+   */
+  previous_item_id?: string | null;
+}
+
+/**
+ * Add a new Item to the Conversation's context, including messages, function
+ * calls, and function call responses. This event can be used both to populate a
+ * "history" of the conversation and to add new items mid-stream, but has the
+ * current limitation that it cannot populate assistant audio messages.
+ *
+ * If successful, the server will respond with a `conversation.item.created` event,
+ * otherwise an `error` event will be sent.
+ */
+export interface ConversationItemCreateEvent {
+  /**
+   * A single item within a Realtime conversation.
+   */
+  item: ConversationItem;
+
+  /**
+   * The event type, must be `conversation.item.create`.
+   */
+  type: 'conversation.item.create';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+
+  /**
+   * The ID of the preceding item after which the new item will be inserted. If not
+   * set, the new item will be appended to the end of the conversation. If set to
+   * `root`, the new item will be added to the beginning of the conversation. If set
+   * to an existing ID, it allows an item to be inserted mid-conversation. If the ID
+   * cannot be found, an error will be returned and the item will not be added.
+   */
+  previous_item_id?: string;
+}
+
+/**
+ * Returned when a conversation item is created. There are several scenarios that
+ * produce this event:
+ *
+ * - The server is generating a Response, which if successful will produce either
+ *   one or two Items, which will be of type `message` (role `assistant`) or type
+ *   `function_call`.
+ * - The input audio buffer has been committed, either by the client or the server
+ *   (in `server_vad` mode). The server will take the content of the input audio
+ *   buffer and add it to a new user message Item.
+ * - The client has sent a `conversation.item.create` event to add a new Item to
+ *   the Conversation.
+ */
+export interface ConversationItemCreatedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * A single item within a Realtime conversation.
+   */
+  item: ConversationItem;
+
+  /**
+   * The event type, must be `conversation.item.created`.
+   */
+  type: 'conversation.item.created';
+
+  /**
+   * The ID of the preceding item in the Conversation context, allows the client to
+   * understand the order of the conversation. Can be `null` if the item has no
+   * predecessor.
+   */
+  previous_item_id?: string | null;
+}
+
+/**
+ * Send this event when you want to remove any item from the conversation history.
+ * The server will respond with a `conversation.item.deleted` event, unless the
+ * item does not exist in the conversation history, in which case the server will
+ * respond with an error.
+ */
+export interface ConversationItemDeleteEvent {
+  /**
+   * The ID of the item to delete.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `conversation.item.delete`.
+   */
+  type: 'conversation.item.delete';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Returned when an item in the conversation is deleted by the client with a
+ * `conversation.item.delete` event. This event is used to synchronize the server's
+ * understanding of the conversation history with the client's view.
+ */
+export interface ConversationItemDeletedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item that was deleted.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `conversation.item.deleted`.
+   */
+  type: 'conversation.item.deleted';
+}
+
+/**
+ * Returned when a conversation item is finalized.
+ */
+export interface ConversationItemDone {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * A single item within a Realtime conversation.
+   */
+  item: ConversationItem;
+
+  /**
+   * The event type, must be `conversation.item.done`.
+   */
+  type: 'conversation.item.done';
+
+  /**
+   * The ID of the item that precedes this one, if any. This is used to maintain
+   * ordering when items are inserted.
+   */
+  previous_item_id?: string | null;
+}
+
+/**
+ * This event is the output of audio transcription for user audio written to the
+ * user audio buffer. Transcription begins when the input audio buffer is committed
+ * by the client or server (in `server_vad` mode). Transcription runs
+ * asynchronously with Response creation, so this event may come before or after
+ * the Response events.
+ *
+ * Realtime API models accept audio natively, and thus input transcription is a
+ * separate process run on a separate ASR (Automatic Speech Recognition) model. The
+ * transcript may diverge somewhat from the model's interpretation, and should be
+ * treated as a rough guide.
+ */
+export interface ConversationItemInputAudioTranscriptionCompletedEvent {
+  /**
+   * The index of the content part containing the audio.
+   */
+  content_index: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the user message item containing the audio.
+   */
+  item_id: string;
+
+  /**
+   * The transcribed text.
+   */
+  transcript: string;
+
+  /**
+   * The event type, must be `conversation.item.input_audio_transcription.completed`.
+   */
+  type: 'conversation.item.input_audio_transcription.completed';
+
+  /**
+   * Usage statistics for the transcription.
+   */
+  usage:
+    | ConversationItemInputAudioTranscriptionCompletedEvent.TranscriptTextUsageTokens
+    | ConversationItemInputAudioTranscriptionCompletedEvent.TranscriptTextUsageDuration;
+
+  /**
+   * The log probabilities of the transcription.
+   */
+  logprobs?: Array<LogProbProperties> | null;
+}
+
+export namespace ConversationItemInputAudioTranscriptionCompletedEvent {
+  /**
+   * Usage statistics for models billed by token usage.
+   */
+  export interface TranscriptTextUsageTokens {
+    /**
+     * Number of input tokens billed for this request.
+     */
+    input_tokens: number;
+
+    /**
+     * Number of output tokens generated.
+     */
+    output_tokens: number;
+
+    /**
+     * Total number of tokens used (input + output).
+     */
+    total_tokens: number;
+
+    /**
+     * The type of the usage object. Always `tokens` for this variant.
+     */
+    type: 'tokens';
+
+    /**
+     * Details about the input tokens billed for this request.
+     */
+    input_token_details?: TranscriptTextUsageTokens.InputTokenDetails;
+  }
+
+  export namespace TranscriptTextUsageTokens {
+    /**
+     * Details about the input tokens billed for this request.
+     */
+    export interface InputTokenDetails {
+      /**
+       * Number of audio tokens billed for this request.
+       */
+      audio_tokens?: number;
+
+      /**
+       * Number of text tokens billed for this request.
+       */
+      text_tokens?: number;
+    }
+  }
+
+  /**
+   * Usage statistics for models billed by audio input duration.
+   */
+  export interface TranscriptTextUsageDuration {
+    /**
+     * Duration of the input audio in seconds.
+     */
+    seconds: number;
+
+    /**
+     * The type of the usage object. Always `duration` for this variant.
+     */
+    type: 'duration';
+  }
+}
+
+/**
+ * Returned when the text value of an input audio transcription content part is
+ * updated.
+ */
+export interface ConversationItemInputAudioTranscriptionDeltaEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `conversation.item.input_audio_transcription.delta`.
+   */
+  type: 'conversation.item.input_audio_transcription.delta';
+
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index?: number;
+
+  /**
+   * The text delta.
+   */
+  delta?: string;
+
+  /**
+   * The log probabilities of the transcription.
+   */
+  logprobs?: Array<LogProbProperties> | null;
+}
+
+/**
+ * Returned when input audio transcription is configured, and a transcription
+ * request for a user message failed. These events are separate from other `error`
+ * events so that the client can identify the related Item.
+ */
+export interface ConversationItemInputAudioTranscriptionFailedEvent {
+  /**
+   * The index of the content part containing the audio.
+   */
+  content_index: number;
+
+  /**
+   * Details of the transcription error.
+   */
+  error: ConversationItemInputAudioTranscriptionFailedEvent.Error;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the user message item.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `conversation.item.input_audio_transcription.failed`.
+   */
+  type: 'conversation.item.input_audio_transcription.failed';
+}
+
+export namespace ConversationItemInputAudioTranscriptionFailedEvent {
+  /**
+   * Details of the transcription error.
+   */
+  export interface Error {
+    /**
+     * Error code, if any.
+     */
+    code?: string;
+
+    /**
+     * A human-readable error message.
+     */
+    message?: string;
+
+    /**
+     * Parameter related to the error, if any.
+     */
+    param?: string;
+
+    /**
+     * The type of error.
+     */
+    type?: string;
+  }
+}
+
+/**
+ * Returned when an input audio transcription segment is identified for an item.
+ */
+export interface ConversationItemInputAudioTranscriptionSegment {
+  /**
+   * The segment identifier.
+   */
+  id: string;
+
+  /**
+   * The index of the input audio content part within the item.
+   */
+  content_index: number;
+
+  /**
+   * End time of the segment in seconds.
+   */
+  end: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item containing the input audio content.
+   */
+  item_id: string;
+
+  /**
+   * The detected speaker label for this segment.
+   */
+  speaker: string;
+
+  /**
+   * Start time of the segment in seconds.
+   */
+  start: number;
+
+  /**
+   * The text for this segment.
+   */
+  text: string;
+
+  /**
+   * The event type, must be `conversation.item.input_audio_transcription.segment`.
+   */
+  type: 'conversation.item.input_audio_transcription.segment';
+}
+
+/**
+ * Send this event when you want to retrieve the server's representation of a
+ * specific item in the conversation history. This is useful, for example, to
+ * inspect user audio after noise cancellation and VAD. The server will respond
+ * with a `conversation.item.retrieved` event, unless the item does not exist in
+ * the conversation history, in which case the server will respond with an error.
+ */
+export interface ConversationItemRetrieveEvent {
+  /**
+   * The ID of the item to retrieve.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `conversation.item.retrieve`.
+   */
+  type: 'conversation.item.retrieve';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Send this event to truncate a previous assistant message’s audio. The server
+ * will produce audio faster than realtime, so this event is useful when the user
+ * interrupts to truncate audio that has already been sent to the client but not
+ * yet played. This will synchronize the server's understanding of the audio with
+ * the client's playback.
+ *
+ * Truncating audio will delete the server-side text transcript to ensure there is
+ * not text in the context that hasn't been heard by the user.
+ *
+ * If successful, the server will respond with a `conversation.item.truncated`
+ * event.
+ */
+export interface ConversationItemTruncateEvent {
+  /**
+   * Inclusive duration up to which audio is truncated, in milliseconds. If the
+   * audio_end_ms is greater than the actual audio duration, the server will respond
+   * with an error.
+   */
+  audio_end_ms: number;
+
+  /**
+   * The index of the content part to truncate. Set this to 0.
+   */
+  content_index: number;
+
+  /**
+   * The ID of the assistant message item to truncate. Only assistant message items
+   * can be truncated.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `conversation.item.truncate`.
+   */
+  type: 'conversation.item.truncate';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Returned when an earlier assistant audio message item is truncated by the client
+ * with a `conversation.item.truncate` event. This event is used to synchronize the
+ * server's understanding of the audio with the client's playback.
+ *
+ * This action will truncate the audio and remove the server-side text transcript
+ * to ensure there is no text in the context that hasn't been heard by the user.
+ */
+export interface ConversationItemTruncatedEvent {
+  /**
+   * The duration up to which the audio was truncated, in milliseconds.
+   */
+  audio_end_ms: number;
+
+  /**
+   * The index of the content part that was truncated.
+   */
+  content_index: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the assistant message item that was truncated.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `conversation.item.truncated`.
+   */
+  type: 'conversation.item.truncated';
+}
+
+/**
+ * The item to add to the conversation.
+ */
+export interface ConversationItemWithReference {
+  /**
+   * For an item of type (`message` | `function_call` | `function_call_output`) this
+   * field allows the client to assign the unique ID of the item. It is not required
+   * because the server will generate one if not provided.
+   *
+   * For an item of type `item_reference`, this field is required and is a reference
+   * to any item that has previously existed in the conversation.
+   */
+  id?: string;
+
+  /**
+   * The arguments of the function call (for `function_call` items).
+   */
+  arguments?: string;
+
+  /**
+   * The ID of the function call (for `function_call` and `function_call_output`
+   * items). If passed on a `function_call_output` item, the server will check that a
+   * `function_call` item with the same ID exists in the conversation history.
+   */
+  call_id?: string;
+
+  /**
+   * The content of the message, applicable for `message` items.
+   *
+   * - Message items of role `system` support only `input_text` content
+   * - Message items of role `user` support `input_text` and `input_audio` content
+   * - Message items of role `assistant` support `text` content.
+   */
+  content?: Array<ConversationItemWithReference.Content>;
+
+  /**
+   * The name of the function being called (for `function_call` items).
+   */
+  name?: string;
+
+  /**
+   * Identifier for the API object being returned - always `realtime.item`.
+   */
+  object?: 'realtime.item';
+
+  /**
+   * The output of the function call (for `function_call_output` items).
+   */
+  output?: string;
+
+  /**
+   * The role of the message sender (`user`, `assistant`, `system`), only applicable
+   * for `message` items.
+   */
+  role?: 'user' | 'assistant' | 'system';
+
+  /**
+   * The status of the item (`completed`, `incomplete`, `in_progress`). These have no
+   * effect on the conversation, but are accepted for consistency with the
+   * `conversation.item.created` event.
+   */
+  status?: 'completed' | 'incomplete' | 'in_progress';
+
+  /**
+   * The type of the item (`message`, `function_call`, `function_call_output`,
+   * `item_reference`).
+   */
+  type?: 'message' | 'function_call' | 'function_call_output' | 'item_reference';
+}
+
+export namespace ConversationItemWithReference {
+  export interface Content {
+    /**
+     * ID of a previous conversation item to reference (for `item_reference` content
+     * types in `response.create` events). These can reference both client and server
+     * created items.
+     */
+    id?: string;
+
+    /**
+     * Base64-encoded audio bytes, used for `input_audio` content type.
+     */
+    audio?: string;
+
+    /**
+     * The text content, used for `input_text` and `text` content types.
+     */
+    text?: string;
+
+    /**
+     * The transcript of the audio, used for `input_audio` content type.
+     */
+    transcript?: string;
+
+    /**
+     * The content type (`input_text`, `input_audio`, `item_reference`, `text`).
+     */
+    type?: 'input_text' | 'input_audio' | 'item_reference' | 'text';
+  }
+}
+
+/**
+ * Send this event to append audio bytes to the input audio buffer. The audio
+ * buffer is temporary storage you can write to and later commit. In Server VAD
+ * mode, the audio buffer is used to detect speech and the server will decide when
+ * to commit. When Server VAD is disabled, you must commit the audio buffer
+ * manually.
+ *
+ * The client may choose how much audio to place in each event up to a maximum of
+ * 15 MiB, for example streaming smaller chunks from the client may allow the VAD
+ * to be more responsive. Unlike made other client events, the server will not send
+ * a confirmation response to this event.
+ */
+export interface InputAudioBufferAppendEvent {
+  /**
+   * Base64-encoded audio bytes. This must be in the format specified by the
+   * `input_audio_format` field in the session configuration.
+   */
+  audio: string;
+
+  /**
+   * The event type, must be `input_audio_buffer.append`.
+   */
+  type: 'input_audio_buffer.append';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Send this event to clear the audio bytes in the buffer. The server will respond
+ * with an `input_audio_buffer.cleared` event.
+ */
+export interface InputAudioBufferClearEvent {
+  /**
+   * The event type, must be `input_audio_buffer.clear`.
+   */
+  type: 'input_audio_buffer.clear';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Returned when the input audio buffer is cleared by the client with a
+ * `input_audio_buffer.clear` event.
+ */
+export interface InputAudioBufferClearedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The event type, must be `input_audio_buffer.cleared`.
+   */
+  type: 'input_audio_buffer.cleared';
+}
+
+/**
+ * Send this event to commit the user input audio buffer, which will create a new
+ * user message item in the conversation. This event will produce an error if the
+ * input audio buffer is empty. When in Server VAD mode, the client does not need
+ * to send this event, the server will commit the audio buffer automatically.
+ *
+ * Committing the input audio buffer will trigger input audio transcription (if
+ * enabled in session configuration), but it will not create a response from the
+ * model. The server will respond with an `input_audio_buffer.committed` event.
+ */
+export interface InputAudioBufferCommitEvent {
+  /**
+   * The event type, must be `input_audio_buffer.commit`.
+   */
+  type: 'input_audio_buffer.commit';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Returned when an input audio buffer is committed, either by the client or
+ * automatically in server VAD mode. The `item_id` property is the ID of the user
+ * message item that will be created, thus a `conversation.item.created` event will
+ * also be sent to the client.
+ */
+export interface InputAudioBufferCommittedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the user message item that will be created.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `input_audio_buffer.committed`.
+   */
+  type: 'input_audio_buffer.committed';
+
+  /**
+   * The ID of the preceding item after which the new item will be inserted. Can be
+   * `null` if the item has no predecessor.
+   */
+  previous_item_id?: string | null;
+}
+
+/**
+ * Sent by the server when in `server_vad` mode to indicate that speech has been
+ * detected in the audio buffer. This can happen any time audio is added to the
+ * buffer (unless speech is already detected). The client may want to use this
+ * event to interrupt audio playback or provide visual feedback to the user.
+ *
+ * The client should expect to receive a `input_audio_buffer.speech_stopped` event
+ * when speech stops. The `item_id` property is the ID of the user message item
+ * that will be created when speech stops and will also be included in the
+ * `input_audio_buffer.speech_stopped` event (unless the client manually commits
+ * the audio buffer during VAD activation).
+ */
+export interface InputAudioBufferSpeechStartedEvent {
+  /**
+   * Milliseconds from the start of all audio written to the buffer during the
+   * session when speech was first detected. This will correspond to the beginning of
+   * audio sent to the model, and thus includes the `prefix_padding_ms` configured in
+   * the Session.
+   */
+  audio_start_ms: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the user message item that will be created when speech stops.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `input_audio_buffer.speech_started`.
+   */
+  type: 'input_audio_buffer.speech_started';
+}
+
+/**
+ * Returned in `server_vad` mode when the server detects the end of speech in the
+ * audio buffer. The server will also send an `conversation.item.created` event
+ * with the user message item that is created from the audio buffer.
+ */
+export interface InputAudioBufferSpeechStoppedEvent {
+  /**
+   * Milliseconds since the session started when speech stopped. This will correspond
+   * to the end of audio sent to the model, and thus includes the
+   * `min_silence_duration_ms` configured in the Session.
+   */
+  audio_end_ms: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the user message item that will be created.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `input_audio_buffer.speech_stopped`.
+   */
+  type: 'input_audio_buffer.speech_stopped';
+}
+
+/**
+ * Returned when the server VAD timeout is triggered for the input audio buffer.
+ */
+export interface InputAudioBufferTimeoutTriggered {
+  /**
+   * Millisecond offset where speech ended within the buffered audio.
+   */
+  audio_end_ms: number;
+
+  /**
+   * Millisecond offset where speech started within the buffered audio.
+   */
+  audio_start_ms: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item associated with this segment.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `input_audio_buffer.timeout_triggered`.
+   */
+  type: 'input_audio_buffer.timeout_triggered';
+}
+
+/**
+ * A log probability object.
+ */
+export interface LogProbProperties {
+  /**
+   * The token that was used to generate the log probability.
+   */
+  token: string;
+
+  /**
+   * The bytes that were used to generate the log probability.
+   */
+  bytes: Array<number>;
+
+  /**
+   * The log probability of the token.
+   */
+  logprob: number;
+}
+
+/**
+ * Returned when listing MCP tools has completed for an item.
+ */
+export interface McpListToolsCompleted {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP list tools item.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `mcp_list_tools.completed`.
+   */
+  type: 'mcp_list_tools.completed';
+}
+
+/**
+ * Returned when listing MCP tools has failed for an item.
+ */
+export interface McpListToolsFailed {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP list tools item.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `mcp_list_tools.failed`.
+   */
+  type: 'mcp_list_tools.failed';
+}
+
+/**
+ * Returned when listing MCP tools is in progress for an item.
+ */
+export interface McpListToolsInProgress {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP list tools item.
+   */
+  item_id: string;
+
+  /**
+   * The event type, must be `mcp_list_tools.in_progress`.
+   */
+  type: 'mcp_list_tools.in_progress';
+}
+
+/**
+ * **WebRTC Only:** Emit to cut off the current audio response. This will trigger
+ * the server to stop generating audio and emit a `output_audio_buffer.cleared`
+ * event. This event should be preceded by a `response.cancel` client event to stop
+ * the generation of the current response.
+ * [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+ */
+export interface OutputAudioBufferClearEvent {
+  /**
+   * The event type, must be `output_audio_buffer.clear`.
+   */
+  type: 'output_audio_buffer.clear';
+
+  /**
+   * The unique ID of the client event used for error handling.
+   */
+  event_id?: string;
+}
+
+/**
+ * Emitted at the beginning of a Response to indicate the updated rate limits. When
+ * a Response is created some tokens will be "reserved" for the output tokens, the
+ * rate limits shown here reflect that reservation, which is then adjusted
+ * accordingly once the Response is completed.
+ */
+export interface RateLimitsUpdatedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * List of rate limit information.
+   */
+  rate_limits: Array<RateLimitsUpdatedEvent.RateLimit>;
+
+  /**
+   * The event type, must be `rate_limits.updated`.
+   */
+  type: 'rate_limits.updated';
+}
+
+export namespace RateLimitsUpdatedEvent {
+  export interface RateLimit {
+    /**
+     * The maximum allowed value for the rate limit.
+     */
+    limit?: number;
+
+    /**
+     * The name of the rate limit (`requests`, `tokens`).
+     */
+    name?: 'requests' | 'tokens';
+
+    /**
+     * The remaining value before the limit is reached.
+     */
+    remaining?: number;
+
+    /**
+     * Seconds until the rate limit resets.
+     */
+    reset_seconds?: number;
+  }
+}
+
+/**
+ * Configuration for input and output audio.
+ */
+export interface RealtimeAudioConfig {
+  input?: RealtimeAudioConfig.Input;
+
+  output?: RealtimeAudioConfig.Output;
+}
+
+export namespace RealtimeAudioConfig {
+  export interface Input {
+    /**
+     * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+     * `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+     * (mono), and little-endian byte order.
+     */
+    format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
+    /**
+     * Configuration for input audio noise reduction. This can be set to `null` to turn
+     * off. Noise reduction filters audio added to the input audio buffer before it is
+     * sent to VAD and the model. Filtering the audio can improve VAD and turn
+     * detection accuracy (reducing false positives) and model performance by improving
+     * perception of the input audio.
+     */
+    noise_reduction?: Input.NoiseReduction;
+
+    /**
+     * Configuration for input audio transcription, defaults to off and can be set to
+     * `null` to turn off once on. Input audio transcription is not native to the
+     * model, since the model consumes audio directly. Transcription runs
+     * asynchronously through
+     * [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+     * and should be treated as guidance of input audio content rather than precisely
+     * what the model heard. The client can optionally set the language and prompt for
+     * transcription, these offer additional guidance to the transcription service.
+     */
+    transcription?: Input.Transcription;
+
+    /**
+     * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+     * set to `null` to turn off, in which case the client must manually trigger model
+     * response. Server VAD means that the model will detect the start and end of
+     * speech based on audio volume and respond at the end of user speech. Semantic VAD
+     * is more advanced and uses a turn detection model (in conjunction with VAD) to
+     * semantically estimate whether the user has finished speaking, then dynamically
+     * sets a timeout based on this probability. For example, if user audio trails off
+     * with "uhhm", the model will score a low probability of turn end and wait longer
+     * for the user to continue speaking. This can be useful for more natural
+     * conversations, but may have a higher latency.
+     */
+    turn_detection?: Input.TurnDetection;
+  }
+
+  export namespace Input {
+    /**
+     * Configuration for input audio noise reduction. This can be set to `null` to turn
+     * off. Noise reduction filters audio added to the input audio buffer before it is
+     * sent to VAD and the model. Filtering the audio can improve VAD and turn
+     * detection accuracy (reducing false positives) and model performance by improving
+     * perception of the input audio.
+     */
+    export interface NoiseReduction {
+      /**
+       * Type of noise reduction. `near_field` is for close-talking microphones such as
+       * headphones, `far_field` is for far-field microphones such as laptop or
+       * conference room microphones.
+       */
+      type?: 'near_field' | 'far_field';
+    }
+
+    /**
+     * Configuration for input audio transcription, defaults to off and can be set to
+     * `null` to turn off once on. Input audio transcription is not native to the
+     * model, since the model consumes audio directly. Transcription runs
+     * asynchronously through
+     * [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+     * and should be treated as guidance of input audio content rather than precisely
+     * what the model heard. The client can optionally set the language and prompt for
+     * transcription, these offer additional guidance to the transcription service.
+     */
+    export interface Transcription {
+      /**
+       * The language of the input audio. Supplying the input language in
+       * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+       * format will improve accuracy and latency.
+       */
+      language?: string;
+
+      /**
+       * The model to use for transcription. Current options are `whisper-1`,
+       * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and
+       * `gpt-4o-transcribe-diarize`.
+       */
+      model?:
+        | 'whisper-1'
+        | 'gpt-4o-transcribe-latest'
+        | 'gpt-4o-mini-transcribe'
+        | 'gpt-4o-transcribe'
+        | 'gpt-4o-transcribe-diarize';
+
+      /**
+       * An optional text to guide the model's style or continue a previous audio
+       * segment. For `whisper-1`, the
+       * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+       * For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+       * "expect words related to technology".
+       */
+      prompt?: string;
+    }
+
+    /**
+     * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+     * set to `null` to turn off, in which case the client must manually trigger model
+     * response. Server VAD means that the model will detect the start and end of
+     * speech based on audio volume and respond at the end of user speech. Semantic VAD
+     * is more advanced and uses a turn detection model (in conjunction with VAD) to
+     * semantically estimate whether the user has finished speaking, then dynamically
+     * sets a timeout based on this probability. For example, if user audio trails off
+     * with "uhhm", the model will score a low probability of turn end and wait longer
+     * for the user to continue speaking. This can be useful for more natural
+     * conversations, but may have a higher latency.
+     */
+    export interface TurnDetection {
+      /**
+       * Whether or not to automatically generate a response when a VAD stop event
+       * occurs.
+       */
+      create_response?: boolean;
+
+      /**
+       * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
+       * will wait longer for the user to continue speaking, `high` will respond more
+       * quickly. `auto` is the default and is equivalent to `medium`.
+       */
+      eagerness?: 'low' | 'medium' | 'high' | 'auto';
+
+      /**
+       * Optional idle timeout after which turn detection will auto-timeout when no
+       * additional audio is received.
+       */
+      idle_timeout_ms?: number | null;
+
+      /**
+       * Whether or not to automatically interrupt any ongoing response with output to
+       * the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+       * occurs.
+       */
+      interrupt_response?: boolean;
+
+      /**
+       * Used only for `server_vad` mode. Amount of audio to include before the VAD
+       * detected speech (in milliseconds). Defaults to 300ms.
+       */
+      prefix_padding_ms?: number;
+
+      /**
+       * Used only for `server_vad` mode. Duration of silence to detect speech stop (in
+       * milliseconds). Defaults to 500ms. With shorter values the model will respond
+       * more quickly, but may jump in on short pauses from the user.
+       */
+      silence_duration_ms?: number;
+
+      /**
+       * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
+       * defaults to 0.5. A higher threshold will require louder audio to activate the
+       * model, and thus might perform better in noisy environments.
+       */
+      threshold?: number;
+
+      /**
+       * Type of turn detection.
+       */
+      type?: 'server_vad' | 'semantic_vad';
+    }
+  }
+
+  export interface Output {
+    /**
+     * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+     * For `pcm16`, output audio is sampled at a rate of 24kHz.
+     */
+    format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
+    /**
+     * The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+     * minimum speed. 1.5 is the maximum speed. This value can only be changed in
+     * between model turns, not while a response is in progress.
+     */
+    speed?: number;
+
+    /**
+     * The voice the model uses to respond. Voice cannot be changed during the session
+     * once the model has responded with audio at least once. Current voice options are
+     * `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
+     * and `cedar`.
+     */
+    voice?:
+      | (string & {})
+      | 'alloy'
+      | 'ash'
+      | 'ballad'
+      | 'coral'
+      | 'echo'
+      | 'sage'
+      | 'shimmer'
+      | 'verse'
+      | 'marin'
+      | 'cedar';
+  }
+}
+
+/**
+ * A realtime client event.
+ */
+export type RealtimeClientEvent =
+  | ConversationItemCreateEvent
+  | ConversationItemDeleteEvent
+  | ConversationItemRetrieveEvent
+  | ConversationItemTruncateEvent
+  | InputAudioBufferAppendEvent
+  | InputAudioBufferClearEvent
+  | OutputAudioBufferClearEvent
+  | InputAudioBufferCommitEvent
+  | ResponseCancelEvent
+  | ResponseCreateEvent
+  | SessionUpdateEvent
+  | TranscriptionSessionUpdate;
+
+/**
+ * Configuration options for the generated client secret.
+ */
+export interface RealtimeClientSecretConfig {
+  /**
+   * Configuration for the ephemeral token expiration.
+   */
+  expires_after?: RealtimeClientSecretConfig.ExpiresAfter;
+}
+
+export namespace RealtimeClientSecretConfig {
+  /**
+   * Configuration for the ephemeral token expiration.
+   */
+  export interface ExpiresAfter {
+    /**
+     * The anchor point for the ephemeral token expiration. Only `created_at` is
+     * currently supported.
+     */
+    anchor: 'created_at';
+
+    /**
+     * The number of seconds from the anchor point to the expiration. Select a value
+     * between `10` and `7200`.
+     */
+    seconds?: number;
+  }
+}
+
+/**
+ * An assistant message item in a Realtime conversation.
+ */
+export interface RealtimeConversationItemAssistantMessage {
+  /**
+   * The content of the message.
+   */
+  content: Array<RealtimeConversationItemAssistantMessage.Content>;
+
+  /**
+   * The role of the message sender. Always `assistant`.
+   */
+  role: 'assistant';
+
+  /**
+   * The type of the item. Always `message`.
+   */
+  type: 'message';
+
+  /**
+   * The unique ID of the item.
+   */
+  id?: string;
+
+  /**
+   * Identifier for the API object being returned - always `realtime.item`.
+   */
+  object?: 'realtime.item';
+
+  /**
+   * The status of the item. Has no effect on the conversation.
+   */
+  status?: 'completed' | 'incomplete' | 'in_progress';
+}
+
+export namespace RealtimeConversationItemAssistantMessage {
+  export interface Content {
+    /**
+     * The text content.
+     */
+    text?: string;
+
+    /**
+     * The content type. Always `text` for assistant messages.
+     */
+    type?: 'text';
+  }
+}
+
+/**
+ * A function call item in a Realtime conversation.
+ */
+export interface RealtimeConversationItemFunctionCall {
+  /**
+   * The arguments of the function call.
+   */
+  arguments: string;
+
+  /**
+   * The name of the function being called.
+   */
+  name: string;
+
+  /**
+   * The type of the item. Always `function_call`.
+   */
+  type: 'function_call';
+
+  /**
+   * The unique ID of the item.
+   */
+  id?: string;
+
+  /**
+   * The ID of the function call.
+   */
+  call_id?: string;
+
+  /**
+   * Identifier for the API object being returned - always `realtime.item`.
+   */
+  object?: 'realtime.item';
+
+  /**
+   * The status of the item. Has no effect on the conversation.
+   */
+  status?: 'completed' | 'incomplete' | 'in_progress';
+}
+
+/**
+ * A function call output item in a Realtime conversation.
+ */
+export interface RealtimeConversationItemFunctionCallOutput {
+  /**
+   * The ID of the function call this output is for.
+   */
+  call_id: string;
+
+  /**
+   * The output of the function call.
+   */
+  output: string;
+
+  /**
+   * The type of the item. Always `function_call_output`.
+   */
+  type: 'function_call_output';
+
+  /**
+   * The unique ID of the item.
+   */
+  id?: string;
+
+  /**
+   * Identifier for the API object being returned - always `realtime.item`.
+   */
+  object?: 'realtime.item';
+
+  /**
+   * The status of the item. Has no effect on the conversation.
+   */
+  status?: 'completed' | 'incomplete' | 'in_progress';
+}
+
+/**
+ * A system message item in a Realtime conversation.
+ */
+export interface RealtimeConversationItemSystemMessage {
+  /**
+   * The content of the message.
+   */
+  content: Array<RealtimeConversationItemSystemMessage.Content>;
+
+  /**
+   * The role of the message sender. Always `system`.
+   */
+  role: 'system';
+
+  /**
+   * The type of the item. Always `message`.
+   */
+  type: 'message';
+
+  /**
+   * The unique ID of the item.
+   */
+  id?: string;
+
+  /**
+   * Identifier for the API object being returned - always `realtime.item`.
+   */
+  object?: 'realtime.item';
+
+  /**
+   * The status of the item. Has no effect on the conversation.
+   */
+  status?: 'completed' | 'incomplete' | 'in_progress';
+}
+
+export namespace RealtimeConversationItemSystemMessage {
+  export interface Content {
+    /**
+     * The text content.
+     */
+    text?: string;
+
+    /**
+     * The content type. Always `input_text` for system messages.
+     */
+    type?: 'input_text';
+  }
+}
+
+/**
+ * A user message item in a Realtime conversation.
+ */
+export interface RealtimeConversationItemUserMessage {
+  /**
+   * The content of the message.
+   */
+  content: Array<RealtimeConversationItemUserMessage.Content>;
+
+  /**
+   * The role of the message sender. Always `user`.
+   */
+  role: 'user';
+
+  /**
+   * The type of the item. Always `message`.
+   */
+  type: 'message';
+
+  /**
+   * The unique ID of the item.
+   */
+  id?: string;
+
+  /**
+   * Identifier for the API object being returned - always `realtime.item`.
+   */
+  object?: 'realtime.item';
+
+  /**
+   * The status of the item. Has no effect on the conversation.
+   */
+  status?: 'completed' | 'incomplete' | 'in_progress';
+}
+
+export namespace RealtimeConversationItemUserMessage {
+  export interface Content {
+    /**
+     * Base64-encoded audio bytes (for `input_audio`).
+     */
+    audio?: string;
+
+    /**
+     * The text content (for `input_text`).
+     */
+    text?: string;
+
+    /**
+     * Transcript of the audio (for `input_audio`).
+     */
+    transcript?: string;
+
+    /**
+     * The content type (`input_text` or `input_audio`).
+     */
+    type?: 'input_text' | 'input_audio';
+  }
+}
+
+/**
+ * Details of the error.
+ */
+export interface RealtimeError {
+  /**
+   * A human-readable error message.
+   */
+  message: string;
+
+  /**
+   * The type of error (e.g., "invalid_request_error", "server_error").
+   */
+  type: string;
+
+  /**
+   * Error code, if any.
+   */
+  code?: string | null;
+
+  /**
+   * The event_id of the client event that caused the error, if applicable.
+   */
+  event_id?: string | null;
+
+  /**
+   * Parameter related to the error, if any.
+   */
+  param?: string | null;
+}
+
+/**
+ * Returned when an error occurs, which could be a client problem or a server
+ * problem. Most errors are recoverable and the session will stay open, we
+ * recommend to implementors to monitor and log error messages by default.
+ */
+export interface RealtimeErrorEvent {
+  /**
+   * Details of the error.
+   */
+  error: RealtimeError;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The event type, must be `error`.
+   */
+  type: 'error';
+}
+
+/**
+ * A Realtime item requesting human approval of a tool invocation.
+ */
+export interface RealtimeMcpApprovalRequest {
+  /**
+   * The unique ID of the approval request.
+   */
+  id: string;
+
+  /**
+   * A JSON string of arguments for the tool.
+   */
+  arguments: string;
+
+  /**
+   * The name of the tool to run.
+   */
+  name: string;
+
+  /**
+   * The label of the MCP server making the request.
+   */
+  server_label: string;
+
+  /**
+   * The type of the item. Always `mcp_approval_request`.
+   */
+  type: 'mcp_approval_request';
+}
+
+/**
+ * A Realtime item responding to an MCP approval request.
+ */
+export interface RealtimeMcpApprovalResponse {
+  /**
+   * The unique ID of the approval response.
+   */
+  id: string;
+
+  /**
+   * The ID of the approval request being answered.
+   */
+  approval_request_id: string;
+
+  /**
+   * Whether the request was approved.
+   */
+  approve: boolean;
+
+  /**
+   * The type of the item. Always `mcp_approval_response`.
+   */
+  type: 'mcp_approval_response';
+
+  /**
+   * Optional reason for the decision.
+   */
+  reason?: string | null;
+}
+
+/**
+ * A Realtime item listing tools available on an MCP server.
+ */
+export interface RealtimeMcpListTools {
+  /**
+   * The label of the MCP server.
+   */
+  server_label: string;
+
+  /**
+   * The tools available on the server.
+   */
+  tools: Array<RealtimeMcpListTools.Tool>;
+
+  /**
+   * The type of the item. Always `mcp_list_tools`.
+   */
+  type: 'mcp_list_tools';
+
+  /**
+   * The unique ID of the list.
+   */
+  id?: string;
+}
+
+export namespace RealtimeMcpListTools {
+  /**
+   * A tool available on an MCP server.
+   */
+  export interface Tool {
+    /**
+     * The JSON schema describing the tool's input.
+     */
+    input_schema: unknown;
+
+    /**
+     * The name of the tool.
+     */
+    name: string;
+
+    /**
+     * Additional annotations about the tool.
+     */
+    annotations?: unknown | null;
+
+    /**
+     * The description of the tool.
+     */
+    description?: string | null;
+  }
+}
+
+export interface RealtimeMcpProtocolError {
+  code: number;
+
+  message: string;
+
+  type: 'protocol_error';
+}
+
+/**
+ * A Realtime item representing an invocation of a tool on an MCP server.
+ */
+export interface RealtimeMcpToolCall {
+  /**
+   * The unique ID of the tool call.
+   */
+  id: string;
+
+  /**
+   * A JSON string of the arguments passed to the tool.
+   */
+  arguments: string;
+
+  /**
+   * The name of the tool that was run.
+   */
+  name: string;
+
+  /**
+   * The label of the MCP server running the tool.
+   */
+  server_label: string;
+
+  /**
+   * The type of the item. Always `mcp_tool_call`.
+   */
+  type: 'mcp_tool_call';
+
+  /**
+   * The ID of an associated approval request, if any.
+   */
+  approval_request_id?: string | null;
+
+  /**
+   * The error from the tool call, if any.
+   */
+  error?: RealtimeMcpProtocolError | RealtimeMcpToolExecutionError | RealtimeMcphttpError | null;
+
+  /**
+   * The output from the tool call.
+   */
+  output?: string | null;
+}
+
+export interface RealtimeMcpToolExecutionError {
+  message: string;
+
+  type: 'tool_execution_error';
+}
+
+export interface RealtimeMcphttpError {
+  code: number;
+
+  message: string;
+
+  type: 'http_error';
+}
+
+/**
+ * The response resource.
+ */
+export interface RealtimeResponse {
+  /**
+   * The unique ID of the response.
+   */
+  id?: string;
+
+  /**
+   * Which conversation the response is added to, determined by the `conversation`
+   * field in the `response.create` event. If `auto`, the response will be added to
+   * the default conversation and the value of `conversation_id` will be an id like
+   * `conv_1234`. If `none`, the response will not be added to any conversation and
+   * the value of `conversation_id` will be `null`. If responses are being triggered
+   * by server VAD, the response will be added to the default conversation, thus the
+   * `conversation_id` will be an id like `conv_1234`.
+   */
+  conversation_id?: string;
+
+  /**
+   * Maximum number of output tokens for a single assistant response, inclusive of
+   * tool calls, that was used in this response.
+   */
+  max_output_tokens?: number | 'inf';
+
+  /**
+   * Set of 16 key-value pairs that can be attached to an object. This can be useful
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
+   */
+  metadata?: Shared.Metadata | null;
+
+  /**
+   * The set of modalities the model used to respond. If there are multiple
+   * modalities, the model will pick one, for example if `modalities` is
+   * `["text", "audio"]`, the model could be responding in either text or audio.
+   */
+  modalities?: Array<'text' | 'audio'>;
+
+  /**
+   * The object type, must be `realtime.response`.
+   */
+  object?: 'realtime.response';
+
+  /**
+   * The list of output items generated by the response.
+   */
+  output?: Array<ConversationItem>;
+
+  /**
+   * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+   */
+  output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
+  /**
+   * The final status of the response (`completed`, `cancelled`, `failed`, or
+   * `incomplete`, `in_progress`).
+   */
+  status?: 'completed' | 'cancelled' | 'failed' | 'incomplete' | 'in_progress';
+
+  /**
+   * Additional details about the status.
+   */
+  status_details?: RealtimeResponseStatus;
+
+  /**
+   * Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+   */
+  temperature?: number;
+
+  /**
+   * Usage statistics for the Response, this will correspond to billing. A Realtime
+   * API session will maintain a conversation context and append new Items to the
+   * Conversation, thus output from previous turns (text and audio tokens) will
+   * become the input for later turns.
+   */
+  usage?: RealtimeResponseUsage;
+
+  /**
+   * The voice the model used to respond. Current voice options are `alloy`, `ash`,
+   * `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
+   */
+  voice?:
+    | (string & {})
+    | 'alloy'
+    | 'ash'
+    | 'ballad'
+    | 'coral'
+    | 'echo'
+    | 'sage'
+    | 'shimmer'
+    | 'verse'
+    | 'marin'
+    | 'cedar';
+}
+
+/**
+ * Additional details about the status.
+ */
+export interface RealtimeResponseStatus {
+  /**
+   * A description of the error that caused the response to fail, populated when the
+   * `status` is `failed`.
+   */
+  error?: RealtimeResponseStatus.Error;
+
+  /**
+   * The reason the Response did not complete. For a `cancelled` Response, one of
+   * `turn_detected` (the server VAD detected a new start of speech) or
+   * `client_cancelled` (the client sent a cancel event). For an `incomplete`
+   * Response, one of `max_output_tokens` or `content_filter` (the server-side safety
+   * filter activated and cut off the response).
+   */
+  reason?: 'turn_detected' | 'client_cancelled' | 'max_output_tokens' | 'content_filter';
+
+  /**
+   * The type of error that caused the response to fail, corresponding with the
+   * `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
+   */
+  type?: 'completed' | 'cancelled' | 'incomplete' | 'failed';
+}
+
+export namespace RealtimeResponseStatus {
+  /**
+   * A description of the error that caused the response to fail, populated when the
+   * `status` is `failed`.
+   */
+  export interface Error {
+    /**
+     * Error code, if any.
+     */
+    code?: string;
+
+    /**
+     * The type of error.
+     */
+    type?: string;
+  }
+}
+
+/**
+ * Usage statistics for the Response, this will correspond to billing. A Realtime
+ * API session will maintain a conversation context and append new Items to the
+ * Conversation, thus output from previous turns (text and audio tokens) will
+ * become the input for later turns.
+ */
+export interface RealtimeResponseUsage {
+  /**
+   * Details about the input tokens used in the Response.
+   */
+  input_token_details?: RealtimeResponseUsageInputTokenDetails;
+
+  /**
+   * The number of input tokens used in the Response, including text and audio
+   * tokens.
+   */
+  input_tokens?: number;
+
+  /**
+   * Details about the output tokens used in the Response.
+   */
+  output_token_details?: RealtimeResponseUsageOutputTokenDetails;
+
+  /**
+   * The number of output tokens sent in the Response, including text and audio
+   * tokens.
+   */
+  output_tokens?: number;
+
+  /**
+   * The total number of tokens in the Response including input and output text and
+   * audio tokens.
+   */
+  total_tokens?: number;
+}
+
+/**
+ * Details about the input tokens used in the Response.
+ */
+export interface RealtimeResponseUsageInputTokenDetails {
+  /**
+   * The number of audio tokens used in the Response.
+   */
+  audio_tokens?: number;
+
+  /**
+   * The number of cached tokens used in the Response.
+   */
+  cached_tokens?: number;
+
+  /**
+   * The number of text tokens used in the Response.
+   */
+  text_tokens?: number;
+}
+
+/**
+ * Details about the output tokens used in the Response.
+ */
+export interface RealtimeResponseUsageOutputTokenDetails {
+  /**
+   * The number of audio tokens used in the Response.
+   */
+  audio_tokens?: number;
+
+  /**
+   * The number of text tokens used in the Response.
+   */
+  text_tokens?: number;
+}
+
+/**
+ * A realtime server event.
+ */
+export type RealtimeServerEvent =
+  | ConversationCreatedEvent
+  | ConversationItemCreatedEvent
+  | ConversationItemDeletedEvent
+  | ConversationItemInputAudioTranscriptionCompletedEvent
+  | ConversationItemInputAudioTranscriptionDeltaEvent
+  | ConversationItemInputAudioTranscriptionFailedEvent
+  | RealtimeServerEvent.ConversationItemRetrieved
+  | ConversationItemTruncatedEvent
+  | RealtimeErrorEvent
+  | InputAudioBufferClearedEvent
+  | InputAudioBufferCommittedEvent
+  | InputAudioBufferSpeechStartedEvent
+  | InputAudioBufferSpeechStoppedEvent
+  | RateLimitsUpdatedEvent
+  | ResponseAudioDeltaEvent
+  | ResponseAudioDoneEvent
+  | ResponseAudioTranscriptDeltaEvent
+  | ResponseAudioTranscriptDoneEvent
+  | ResponseContentPartAddedEvent
+  | ResponseContentPartDoneEvent
+  | ResponseCreatedEvent
+  | ResponseDoneEvent
+  | ResponseFunctionCallArgumentsDeltaEvent
+  | ResponseFunctionCallArgumentsDoneEvent
+  | ResponseOutputItemAddedEvent
+  | ResponseOutputItemDoneEvent
+  | ResponseTextDeltaEvent
+  | ResponseTextDoneEvent
+  | SessionCreatedEvent
+  | SessionUpdatedEvent
+  | TranscriptionSessionUpdatedEvent
+  | TranscriptionSessionCreated
+  | RealtimeServerEvent.OutputAudioBufferStarted
+  | RealtimeServerEvent.OutputAudioBufferStopped
+  | RealtimeServerEvent.OutputAudioBufferCleared
+  | ConversationItemAdded
+  | ConversationItemDone
+  | InputAudioBufferTimeoutTriggered
+  | ConversationItemInputAudioTranscriptionSegment
+  | McpListToolsInProgress
+  | McpListToolsCompleted
+  | McpListToolsFailed
+  | ResponseMcpCallArgumentsDelta
+  | ResponseMcpCallArgumentsDone
+  | ResponseMcpCallInProgress
+  | ResponseMcpCallCompleted
+  | ResponseMcpCallFailed;
+
+export namespace RealtimeServerEvent {
+  /**
+   * Returned when a conversation item is retrieved with
+   * `conversation.item.retrieve`.
+   */
+  export interface ConversationItemRetrieved {
+    /**
+     * The unique ID of the server event.
+     */
+    event_id: string;
+
+    /**
+     * A single item within a Realtime conversation.
+     */
+    item: RealtimeAPI.ConversationItem;
+
+    /**
+     * The event type, must be `conversation.item.retrieved`.
+     */
+    type: 'conversation.item.retrieved';
+  }
+
+  /**
+   * **WebRTC Only:** Emitted when the server begins streaming audio to the client.
+   * This event is emitted after an audio content part has been added
+   * (`response.content_part.added`) to the response.
+   * [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+   */
+  export interface OutputAudioBufferStarted {
+    /**
+     * The unique ID of the server event.
+     */
+    event_id: string;
+
+    /**
+     * The unique ID of the response that produced the audio.
+     */
+    response_id: string;
+
+    /**
+     * The event type, must be `output_audio_buffer.started`.
+     */
+    type: 'output_audio_buffer.started';
+  }
+
+  /**
+   * **WebRTC Only:** Emitted when the output audio buffer has been completely
+   * drained on the server, and no more audio is forthcoming. This event is emitted
+   * after the full response data has been sent to the client (`response.done`).
+   * [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+   */
+  export interface OutputAudioBufferStopped {
+    /**
+     * The unique ID of the server event.
+     */
+    event_id: string;
+
+    /**
+     * The unique ID of the response that produced the audio.
+     */
+    response_id: string;
+
+    /**
+     * The event type, must be `output_audio_buffer.stopped`.
+     */
+    type: 'output_audio_buffer.stopped';
+  }
+
+  /**
+   * **WebRTC Only:** Emitted when the output audio buffer is cleared. This happens
+   * either in VAD mode when the user has interrupted
+   * (`input_audio_buffer.speech_started`), or when the client has emitted the
+   * `output_audio_buffer.clear` event to manually cut off the current audio
+   * response.
+   * [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+   */
+  export interface OutputAudioBufferCleared {
+    /**
+     * The unique ID of the server event.
+     */
+    event_id: string;
+
+    /**
+     * The unique ID of the response that produced the audio.
+     */
+    response_id: string;
+
+    /**
+     * The event type, must be `output_audio_buffer.cleared`.
+     */
+    type: 'output_audio_buffer.cleared';
+  }
+}
+
+/**
+ * Realtime session object.
+ */
+export interface RealtimeSession {
+  /**
+   * Unique identifier for the session that looks like `sess_1234567890abcdef`.
+   */
+  id?: string;
+
+  /**
+   * Expiration timestamp for the session, in seconds since epoch.
+   */
+  expires_at?: number;
+
+  /**
+   * Additional fields to include in server outputs.
+   *
+   * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+   *   transcription.
+   */
+  include?: Array<'item.input_audio_transcription.logprobs'> | null;
+
+  /**
+   * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+   * `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+   * (mono), and little-endian byte order.
+   */
+  input_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
+  /**
+   * Configuration for input audio noise reduction. This can be set to `null` to turn
+   * off. Noise reduction filters audio added to the input audio buffer before it is
+   * sent to VAD and the model. Filtering the audio can improve VAD and turn
+   * detection accuracy (reducing false positives) and model performance by improving
+   * perception of the input audio.
+   */
+  input_audio_noise_reduction?: RealtimeSession.InputAudioNoiseReduction;
+
+  /**
+   * Configuration for input audio transcription, defaults to off and can be set to
+   * `null` to turn off once on. Input audio transcription is not native to the
+   * model, since the model consumes audio directly. Transcription runs
+   * asynchronously through
+   * [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+   * and should be treated as guidance of input audio content rather than precisely
+   * what the model heard. The client can optionally set the language and prompt for
+   * transcription, these offer additional guidance to the transcription service.
+   */
+  input_audio_transcription?: RealtimeSession.InputAudioTranscription | null;
+
+  /**
+   * The default system instructions (i.e. system message) prepended to model calls.
+   * This field allows the client to guide the model on desired responses. The model
+   * can be instructed on response content and format, (e.g. "be extremely succinct",
+   * "act friendly", "here are examples of good responses") and on audio behavior
+   * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+   * instructions are not guaranteed to be followed by the model, but they provide
+   * guidance to the model on the desired behavior.
+   *
+   * Note that the server sets default instructions which will be used if this field
+   * is not set and are visible in the `session.created` event at the start of the
+   * session.
+   */
+  instructions?: string;
+
+  /**
+   * Maximum number of output tokens for a single assistant response, inclusive of
+   * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+   * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+   */
+  max_response_output_tokens?: number | 'inf';
+
+  /**
+   * The set of modalities the model can respond with. To disable audio, set this to
+   * ["text"].
+   */
+  modalities?: Array<'text' | 'audio'>;
+
+  /**
+   * The Realtime model used for this session.
+   */
+  model?:
+    | 'gpt-4o-realtime-preview'
+    | 'gpt-4o-realtime-preview-2024-10-01'
+    | 'gpt-4o-realtime-preview-2024-12-17'
+    | 'gpt-4o-realtime-preview-2025-06-03'
+    | 'gpt-4o-mini-realtime-preview'
+    | 'gpt-4o-mini-realtime-preview-2024-12-17';
+
+  /**
+   * The object type. Always `realtime.session`.
+   */
+  object?: 'realtime.session';
+
+  /**
+   * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+   * For `pcm16`, output audio is sampled at a rate of 24kHz.
+   */
+  output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
+  /**
+   * Reference to a prompt template and its variables.
+   * [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+   */
+  prompt?: ResponsesAPI.ResponsePrompt | null;
+
+  /**
+   * The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+   * minimum speed. 1.5 is the maximum speed. This value can only be changed in
+   * between model turns, not while a response is in progress.
+   */
+  speed?: number;
+
+  /**
+   * Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+   * temperature of 0.8 is highly recommended for best performance.
+   */
+  temperature?: number;
+
+  /**
+   * How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+   * a function.
+   */
+  tool_choice?: string;
+
+  /**
+   * Tools (functions) available to the model.
+   */
+  tools?: Array<RealtimeSession.Tool>;
+
+  /**
+   * Configuration options for tracing. Set to null to disable tracing. Once tracing
+   * is enabled for a session, the configuration cannot be modified.
+   *
+   * `auto` will create a trace for the session with default values for the workflow
+   * name, group id, and metadata.
+   */
+  tracing?: 'auto' | RealtimeSession.TracingConfiguration | null;
+
+  /**
+   * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+   * set to `null` to turn off, in which case the client must manually trigger model
+   * response. Server VAD means that the model will detect the start and end of
+   * speech based on audio volume and respond at the end of user speech. Semantic VAD
+   * is more advanced and uses a turn detection model (in conjunction with VAD) to
+   * semantically estimate whether the user has finished speaking, then dynamically
+   * sets a timeout based on this probability. For example, if user audio trails off
+   * with "uhhm", the model will score a low probability of turn end and wait longer
+   * for the user to continue speaking. This can be useful for more natural
+   * conversations, but may have a higher latency.
+   */
+  turn_detection?: RealtimeSession.TurnDetection | null;
+
+  /**
+   * The voice the model uses to respond. Voice cannot be changed during the session
+   * once the model has responded with audio at least once. Current voice options are
+   * `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
+   */
+  voice?:
+    | (string & {})
+    | 'alloy'
+    | 'ash'
+    | 'ballad'
+    | 'coral'
+    | 'echo'
+    | 'sage'
+    | 'shimmer'
+    | 'verse'
+    | 'marin'
+    | 'cedar';
+}
+
+export namespace RealtimeSession {
+  /**
+   * Configuration for input audio noise reduction. This can be set to `null` to turn
+   * off. Noise reduction filters audio added to the input audio buffer before it is
+   * sent to VAD and the model. Filtering the audio can improve VAD and turn
+   * detection accuracy (reducing false positives) and model performance by improving
+   * perception of the input audio.
+   */
+  export interface InputAudioNoiseReduction {
+    /**
+     * Type of noise reduction. `near_field` is for close-talking microphones such as
+     * headphones, `far_field` is for far-field microphones such as laptop or
+     * conference room microphones.
+     */
+    type?: 'near_field' | 'far_field';
+  }
+
+  /**
+   * Configuration for input audio transcription, defaults to off and can be set to
+   * `null` to turn off once on. Input audio transcription is not native to the
+   * model, since the model consumes audio directly. Transcription runs
+   * asynchronously through
+   * [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+   * and should be treated as guidance of input audio content rather than precisely
+   * what the model heard. The client can optionally set the language and prompt for
+   * transcription, these offer additional guidance to the transcription service.
+   */
+  export interface InputAudioTranscription {
+    /**
+     * The language of the input audio. Supplying the input language in
+     * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+     * format will improve accuracy and latency.
+     */
+    language?: string;
+
+    /**
+     * The model to use for transcription, current options are `gpt-4o-transcribe`,
+     * `gpt-4o-mini-transcribe`, and `whisper-1`.
+     */
+    model?: string;
+
+    /**
+     * An optional text to guide the model's style or continue a previous audio
+     * segment. For `whisper-1`, the
+     * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+     * For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+     * "expect words related to technology".
+     */
+    prompt?: string;
+  }
+
+  export interface Tool {
+    /**
+     * The description of the function, including guidance on when and how to call it,
+     * and guidance about what to tell the user when calling (if anything).
+     */
+    description?: string;
+
+    /**
+     * The name of the function.
+     */
+    name?: string;
+
+    /**
+     * Parameters of the function in JSON Schema.
+     */
+    parameters?: unknown;
+
+    /**
+     * The type of the tool, i.e. `function`.
+     */
+    type?: 'function';
+  }
+
+  /**
+   * Granular configuration for tracing.
+   */
+  export interface TracingConfiguration {
+    /**
+     * The group id to attach to this trace to enable filtering and grouping in the
+     * traces dashboard.
+     */
+    group_id?: string;
+
+    /**
+     * The arbitrary metadata to attach to this trace to enable filtering in the traces
+     * dashboard.
+     */
+    metadata?: unknown;
+
+    /**
+     * The name of the workflow to attach to this trace. This is used to name the trace
+     * in the traces dashboard.
+     */
+    workflow_name?: string;
+  }
+
+  /**
+   * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+   * set to `null` to turn off, in which case the client must manually trigger model
+   * response. Server VAD means that the model will detect the start and end of
+   * speech based on audio volume and respond at the end of user speech. Semantic VAD
+   * is more advanced and uses a turn detection model (in conjunction with VAD) to
+   * semantically estimate whether the user has finished speaking, then dynamically
+   * sets a timeout based on this probability. For example, if user audio trails off
+   * with "uhhm", the model will score a low probability of turn end and wait longer
+   * for the user to continue speaking. This can be useful for more natural
+   * conversations, but may have a higher latency.
+   */
+  export interface TurnDetection {
+    /**
+     * Whether or not to automatically generate a response when a VAD stop event
+     * occurs.
+     */
+    create_response?: boolean;
+
+    /**
+     * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
+     * will wait longer for the user to continue speaking, `high` will respond more
+     * quickly. `auto` is the default and is equivalent to `medium`.
+     */
+    eagerness?: 'low' | 'medium' | 'high' | 'auto';
+
+    /**
+     * Optional idle timeout after which turn detection will auto-timeout when no
+     * additional audio is received.
+     */
+    idle_timeout_ms?: number | null;
+
+    /**
+     * Whether or not to automatically interrupt any ongoing response with output to
+     * the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+     * occurs.
+     */
+    interrupt_response?: boolean;
+
+    /**
+     * Used only for `server_vad` mode. Amount of audio to include before the VAD
+     * detected speech (in milliseconds). Defaults to 300ms.
+     */
+    prefix_padding_ms?: number;
+
+    /**
+     * Used only for `server_vad` mode. Duration of silence to detect speech stop (in
+     * milliseconds). Defaults to 500ms. With shorter values the model will respond
+     * more quickly, but may jump in on short pauses from the user.
+     */
+    silence_duration_ms?: number;
+
+    /**
+     * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
+     * defaults to 0.5. A higher threshold will require louder audio to activate the
+     * model, and thus might perform better in noisy environments.
+     */
+    threshold?: number;
+
+    /**
+     * Type of turn detection.
+     */
+    type?: 'server_vad' | 'semantic_vad';
+  }
+}
+
+/**
+ * Realtime session object configuration.
+ */
+export interface RealtimeSessionCreateRequest {
+  /**
+   * The Realtime model used for this session.
+   */
+  model:
+    | (string & {})
+    | 'gpt-4o-realtime'
+    | 'gpt-4o-mini-realtime'
+    | 'gpt-4o-realtime-preview'
+    | 'gpt-4o-realtime-preview-2024-10-01'
+    | 'gpt-4o-realtime-preview-2024-12-17'
+    | 'gpt-4o-realtime-preview-2025-06-03'
+    | 'gpt-4o-mini-realtime-preview'
+    | 'gpt-4o-mini-realtime-preview-2024-12-17';
+
+  /**
+   * The type of session to create. Always `realtime` for the Realtime API.
+   */
+  type: 'realtime';
+
+  /**
+   * Configuration for input and output audio.
+   */
+  audio?: RealtimeAudioConfig;
+
+  /**
+   * Configuration options for the generated client secret.
+   */
+  client_secret?: RealtimeClientSecretConfig;
+
+  /**
+   * Additional fields to include in server outputs.
+   *
+   * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+   *   transcription.
+   */
+  include?: Array<'item.input_audio_transcription.logprobs'>;
+
+  /**
+   * The default system instructions (i.e. system message) prepended to model calls.
+   * This field allows the client to guide the model on desired responses. The model
+   * can be instructed on response content and format, (e.g. "be extremely succinct",
+   * "act friendly", "here are examples of good responses") and on audio behavior
+   * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+   * instructions are not guaranteed to be followed by the model, but they provide
+   * guidance to the model on the desired behavior.
+   *
+   * Note that the server sets default instructions which will be used if this field
+   * is not set and are visible in the `session.created` event at the start of the
+   * session.
+   */
+  instructions?: string;
+
+  /**
+   * Maximum number of output tokens for a single assistant response, inclusive of
+   * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+   * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+   */
+  max_output_tokens?: number | 'inf';
+
+  /**
+   * The set of modalities the model can respond with. To disable audio, set this to
+   * ["text"].
+   */
+  output_modalities?: Array<'text' | 'audio'>;
+
+  /**
+   * Reference to a prompt template and its variables.
+   * [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+   */
+  prompt?: ResponsesAPI.ResponsePrompt | null;
+
+  /**
+   * Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+   * temperature of 0.8 is highly recommended for best performance.
+   */
+  temperature?: number;
+
+  /**
+   * How the model chooses tools. Provide one of the string modes or force a specific
+   * function/MCP tool.
+   */
+  tool_choice?: RealtimeToolChoiceConfig;
+
+  /**
+   * Tools available to the model.
+   */
+  tools?: RealtimeToolsConfig;
+
+  /**
+   * Configuration options for tracing. Set to null to disable tracing. Once tracing
+   * is enabled for a session, the configuration cannot be modified.
+   *
+   * `auto` will create a trace for the session with default values for the workflow
+   * name, group id, and metadata.
+   */
+  tracing?: RealtimeTracingConfig | null;
+
+  /**
+   * Controls how the realtime conversation is truncated prior to model inference.
+   * The default is `auto`. When set to `retention_ratio`, the server retains a
+   * fraction of the conversation tokens prior to the instructions.
+   */
+  truncation?: RealtimeTruncation;
+}
+
+/**
+ * How the model chooses tools. Provide one of the string modes or force a specific
+ * function/MCP tool.
+ */
+export type RealtimeToolChoiceConfig =
+  | ResponsesAPI.ToolChoiceOptions
+  | ResponsesAPI.ToolChoiceFunction
+  | ResponsesAPI.ToolChoiceMcp;
+
+/**
+ * Tools available to the model.
+ */
+export type RealtimeToolsConfig = Array<RealtimeToolsConfigUnion>;
+
+/**
+ * Give the model access to additional tools via remote Model Context Protocol
+ * (MCP) servers.
+ * [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+ */
+export type RealtimeToolsConfigUnion = RealtimeToolsConfigUnion.Function | RealtimeToolsConfigUnion.Mcp;
+
+export namespace RealtimeToolsConfigUnion {
+  export interface Function {
+    /**
+     * The description of the function, including guidance on when and how to call it,
+     * and guidance about what to tell the user when calling (if anything).
+     */
+    description?: string;
+
+    /**
+     * The name of the function.
+     */
+    name?: string;
+
+    /**
+     * Parameters of the function in JSON Schema.
+     */
+    parameters?: unknown;
+
+    /**
+     * The type of the tool, i.e. `function`.
+     */
+    type?: 'function';
+  }
+
+  /**
+   * Give the model access to additional tools via remote Model Context Protocol
+   * (MCP) servers.
+   * [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+   */
+  export interface Mcp {
+    /**
+     * A label for this MCP server, used to identify it in tool calls.
+     */
+    server_label: string;
+
+    /**
+     * The type of the MCP tool. Always `mcp`.
+     */
+    type: 'mcp';
+
+    /**
+     * List of allowed tool names or a filter object.
+     */
+    allowed_tools?: Array<string> | Mcp.McpToolFilter | null;
+
+    /**
+     * An OAuth access token that can be used with a remote MCP server, either with a
+     * custom MCP server URL or a service connector. Your application must handle the
+     * OAuth authorization flow and provide the token here.
+     */
+    authorization?: string;
+
+    /**
+     * Identifier for service connectors, like those available in ChatGPT. One of
+     * `server_url` or `connector_id` must be provided. Learn more about service
+     * connectors
+     * [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+     *
+     * Currently supported `connector_id` values are:
+     *
+     * - Dropbox: `connector_dropbox`
+     * - Gmail: `connector_gmail`
+     * - Google Calendar: `connector_googlecalendar`
+     * - Google Drive: `connector_googledrive`
+     * - Microsoft Teams: `connector_microsoftteams`
+     * - Outlook Calendar: `connector_outlookcalendar`
+     * - Outlook Email: `connector_outlookemail`
+     * - SharePoint: `connector_sharepoint`
+     */
+    connector_id?:
+      | 'connector_dropbox'
+      | 'connector_gmail'
+      | 'connector_googlecalendar'
+      | 'connector_googledrive'
+      | 'connector_microsoftteams'
+      | 'connector_outlookcalendar'
+      | 'connector_outlookemail'
+      | 'connector_sharepoint';
+
+    /**
+     * Optional HTTP headers to send to the MCP server. Use for authentication or other
+     * purposes.
+     */
+    headers?: { [key: string]: string } | null;
+
+    /**
+     * Specify which of the MCP server's tools require approval.
+     */
+    require_approval?: Mcp.McpToolApprovalFilter | 'always' | 'never' | null;
+
+    /**
+     * Optional description of the MCP server, used to provide more context.
+     */
+    server_description?: string;
+
+    /**
+     * The URL for the MCP server. One of `server_url` or `connector_id` must be
+     * provided.
+     */
+    server_url?: string;
+  }
+
+  export namespace Mcp {
+    /**
+     * A filter object to specify which tools are allowed.
+     */
+    export interface McpToolFilter {
+      /**
+       * Indicates whether or not a tool modifies data or is read-only. If an MCP server
+       * is
+       * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+       * it will match this filter.
+       */
+      read_only?: boolean;
+
+      /**
+       * List of allowed tool names.
+       */
+      tool_names?: Array<string>;
+    }
+
+    /**
+     * Specify which of the MCP server's tools require approval. Can be `always`,
+     * `never`, or a filter object associated with tools that require approval.
+     */
+    export interface McpToolApprovalFilter {
+      /**
+       * A filter object to specify which tools are allowed.
+       */
+      always?: McpToolApprovalFilter.Always;
+
+      /**
+       * A filter object to specify which tools are allowed.
+       */
+      never?: McpToolApprovalFilter.Never;
+    }
+
+    export namespace McpToolApprovalFilter {
+      /**
+       * A filter object to specify which tools are allowed.
+       */
+      export interface Always {
+        /**
+         * Indicates whether or not a tool modifies data or is read-only. If an MCP server
+         * is
+         * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+         * it will match this filter.
+         */
+        read_only?: boolean;
+
+        /**
+         * List of allowed tool names.
+         */
+        tool_names?: Array<string>;
+      }
+
+      /**
+       * A filter object to specify which tools are allowed.
+       */
+      export interface Never {
+        /**
+         * Indicates whether or not a tool modifies data or is read-only. If an MCP server
+         * is
+         * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+         * it will match this filter.
+         */
+        read_only?: boolean;
+
+        /**
+         * List of allowed tool names.
+         */
+        tool_names?: Array<string>;
+      }
+    }
+  }
+}
+
+/**
+ * Configuration options for tracing. Set to null to disable tracing. Once tracing
+ * is enabled for a session, the configuration cannot be modified.
+ *
+ * `auto` will create a trace for the session with default values for the workflow
+ * name, group id, and metadata.
+ */
+export type RealtimeTracingConfig = 'auto' | RealtimeTracingConfig.TracingConfiguration;
+
+export namespace RealtimeTracingConfig {
+  /**
+   * Granular configuration for tracing.
+   */
+  export interface TracingConfiguration {
+    /**
+     * The group id to attach to this trace to enable filtering and grouping in the
+     * traces dashboard.
+     */
+    group_id?: string;
+
+    /**
+     * The arbitrary metadata to attach to this trace to enable filtering in the traces
+     * dashboard.
+     */
+    metadata?: unknown;
+
+    /**
+     * The name of the workflow to attach to this trace. This is used to name the trace
+     * in the traces dashboard.
+     */
+    workflow_name?: string;
+  }
+}
+
+/**
+ * Realtime transcription session object configuration.
+ */
+export interface RealtimeTranscriptionSessionCreateRequest {
+  /**
+   * ID of the model to use. The options are `gpt-4o-transcribe`,
+   * `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+   * Whisper V2 model).
+   */
+  model: (string & {}) | 'whisper-1' | 'gpt-4o-transcribe' | 'gpt-4o-mini-transcribe';
+
+  /**
+   * The type of session to create. Always `transcription` for transcription
+   * sessions.
+   */
+  type: 'transcription';
+
+  /**
+   * The set of items to include in the transcription. Current available items are:
+   *
+   * - `item.input_audio_transcription.logprobs`
+   */
+  include?: Array<'item.input_audio_transcription.logprobs'>;
+
+  /**
+   * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+   * `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+   * (mono), and little-endian byte order.
+   */
+  input_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
+  /**
+   * Configuration for input audio noise reduction. This can be set to `null` to turn
+   * off. Noise reduction filters audio added to the input audio buffer before it is
+   * sent to VAD and the model. Filtering the audio can improve VAD and turn
+   * detection accuracy (reducing false positives) and model performance by improving
+   * perception of the input audio.
+   */
+  input_audio_noise_reduction?: RealtimeTranscriptionSessionCreateRequest.InputAudioNoiseReduction;
+
+  /**
+   * Configuration for input audio transcription. The client can optionally set the
+   * language and prompt for transcription, these offer additional guidance to the
+   * transcription service.
+   */
+  input_audio_transcription?: RealtimeTranscriptionSessionCreateRequest.InputAudioTranscription;
+
+  /**
+   * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+   * means that the model will detect the start and end of speech based on audio
+   * volume and respond at the end of user speech.
+   */
+  turn_detection?: RealtimeTranscriptionSessionCreateRequest.TurnDetection;
+}
+
+export namespace RealtimeTranscriptionSessionCreateRequest {
+  /**
+   * Configuration for input audio noise reduction. This can be set to `null` to turn
+   * off. Noise reduction filters audio added to the input audio buffer before it is
+   * sent to VAD and the model. Filtering the audio can improve VAD and turn
+   * detection accuracy (reducing false positives) and model performance by improving
+   * perception of the input audio.
+   */
+  export interface InputAudioNoiseReduction {
+    /**
+     * Type of noise reduction. `near_field` is for close-talking microphones such as
+     * headphones, `far_field` is for far-field microphones such as laptop or
+     * conference room microphones.
+     */
+    type?: 'near_field' | 'far_field';
+  }
+
+  /**
+   * Configuration for input audio transcription. The client can optionally set the
+   * language and prompt for transcription, these offer additional guidance to the
+   * transcription service.
+   */
+  export interface InputAudioTranscription {
+    /**
+     * The language of the input audio. Supplying the input language in
+     * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+     * format will improve accuracy and latency.
+     */
+    language?: string;
+
+    /**
+     * The model to use for transcription, current options are `gpt-4o-transcribe`,
+     * `gpt-4o-mini-transcribe`, and `whisper-1`.
+     */
+    model?: 'gpt-4o-transcribe' | 'gpt-4o-mini-transcribe' | 'whisper-1';
+
+    /**
+     * An optional text to guide the model's style or continue a previous audio
+     * segment. For `whisper-1`, the
+     * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+     * For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+     * "expect words related to technology".
+     */
+    prompt?: string;
+  }
+
+  /**
+   * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+   * means that the model will detect the start and end of speech based on audio
+   * volume and respond at the end of user speech.
+   */
+  export interface TurnDetection {
+    /**
+     * Amount of audio to include before the VAD detected speech (in milliseconds).
+     * Defaults to 300ms.
+     */
+    prefix_padding_ms?: number;
+
+    /**
+     * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+     * With shorter values the model will respond more quickly, but may jump in on
+     * short pauses from the user.
+     */
+    silence_duration_ms?: number;
+
+    /**
+     * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+     * threshold will require louder audio to activate the model, and thus might
+     * perform better in noisy environments.
+     */
+    threshold?: number;
+
+    /**
+     * Type of turn detection. Only `server_vad` is currently supported for
+     * transcription sessions.
+     */
+    type?: 'server_vad';
+  }
+}
+
+/**
+ * Controls how the realtime conversation is truncated prior to model inference.
+ * The default is `auto`. When set to `retention_ratio`, the server retains a
+ * fraction of the conversation tokens prior to the instructions.
+ */
+export type RealtimeTruncation = 'auto' | 'disabled' | RealtimeTruncation.RetentionRatioTruncation;
+
+export namespace RealtimeTruncation {
+  /**
+   * Retain a fraction of the conversation tokens.
+   */
+  export interface RetentionRatioTruncation {
+    /**
+     * Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0).
+     */
+    retention_ratio: number;
+
+    /**
+     * Use retention ratio truncation.
+     */
+    type: 'retention_ratio';
+
+    /**
+     * Optional cap on tokens allowed after the instructions.
+     */
+    post_instructions_token_limit?: number | null;
+  }
+}
+
+/**
+ * Returned when the model-generated audio is updated.
+ */
+export interface ResponseAudioDeltaEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * Base64-encoded audio data delta.
+   */
+  delta: string;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.output_audio.delta`.
+   */
+  type: 'response.output_audio.delta';
+}
+
+/**
+ * Returned when the model-generated audio is done. Also emitted when a Response is
+ * interrupted, incomplete, or cancelled.
+ */
+export interface ResponseAudioDoneEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.output_audio.done`.
+   */
+  type: 'response.output_audio.done';
+}
+
+/**
+ * Returned when the model-generated transcription of audio output is updated.
+ */
+export interface ResponseAudioTranscriptDeltaEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * The transcript delta.
+   */
+  delta: string;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.output_audio_transcript.delta`.
+   */
+  type: 'response.output_audio_transcript.delta';
+}
+
+/**
+ * Returned when the model-generated transcription of audio output is done
+ * streaming. Also emitted when a Response is interrupted, incomplete, or
+ * cancelled.
+ */
+export interface ResponseAudioTranscriptDoneEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The final transcript of the audio.
+   */
+  transcript: string;
+
+  /**
+   * The event type, must be `response.output_audio_transcript.done`.
+   */
+  type: 'response.output_audio_transcript.done';
+}
+
+/**
+ * Send this event to cancel an in-progress response. The server will respond with
+ * a `response.done` event with a status of `response.status=cancelled`. If there
+ * is no response to cancel, the server will respond with an error.
+ */
+export interface ResponseCancelEvent {
+  /**
+   * The event type, must be `response.cancel`.
+   */
+  type: 'response.cancel';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+
+  /**
+   * A specific response ID to cancel - if not provided, will cancel an in-progress
+   * response in the default conversation.
+   */
+  response_id?: string;
+}
+
+/**
+ * Returned when a new content part is added to an assistant message item during
+ * response generation.
+ */
+export interface ResponseContentPartAddedEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item to which the content part was added.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The content part that was added.
+   */
+  part: ResponseContentPartAddedEvent.Part;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.content_part.added`.
+   */
+  type: 'response.content_part.added';
+}
+
+export namespace ResponseContentPartAddedEvent {
+  /**
+   * The content part that was added.
+   */
+  export interface Part {
+    /**
+     * Base64-encoded audio data (if type is "audio").
+     */
+    audio?: string;
+
+    /**
+     * The text content (if type is "text").
+     */
+    text?: string;
+
+    /**
+     * The transcript of the audio (if type is "audio").
+     */
+    transcript?: string;
+
+    /**
+     * The content type ("text", "audio").
+     */
+    type?: 'text' | 'audio';
+  }
+}
+
+/**
+ * Returned when a content part is done streaming in an assistant message item.
+ * Also emitted when a Response is interrupted, incomplete, or cancelled.
+ */
+export interface ResponseContentPartDoneEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The content part that is done.
+   */
+  part: ResponseContentPartDoneEvent.Part;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.content_part.done`.
+   */
+  type: 'response.content_part.done';
+}
+
+export namespace ResponseContentPartDoneEvent {
+  /**
+   * The content part that is done.
+   */
+  export interface Part {
+    /**
+     * Base64-encoded audio data (if type is "audio").
+     */
+    audio?: string;
+
+    /**
+     * The text content (if type is "text").
+     */
+    text?: string;
+
+    /**
+     * The transcript of the audio (if type is "audio").
+     */
+    transcript?: string;
+
+    /**
+     * The content type ("text", "audio").
+     */
+    type?: 'text' | 'audio';
+  }
+}
+
+/**
+ * This event instructs the server to create a Response, which means triggering
+ * model inference. When in Server VAD mode, the server will create Responses
+ * automatically.
+ *
+ * A Response will include at least one Item, and may have two, in which case the
+ * second will be a function call. These Items will be appended to the conversation
+ * history.
+ *
+ * The server will respond with a `response.created` event, events for Items and
+ * content created, and finally a `response.done` event to indicate the Response is
+ * complete.
+ *
+ * The `response.create` event includes inference configuration like
+ * `instructions`, and `temperature`. These fields will override the Session's
+ * configuration for this Response only.
+ */
+export interface ResponseCreateEvent {
+  /**
+   * The event type, must be `response.create`.
+   */
+  type: 'response.create';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+
+  /**
+   * Create a new Realtime response with these parameters
+   */
+  response?: ResponseCreateEvent.Response;
+}
+
+export namespace ResponseCreateEvent {
+  /**
+   * Create a new Realtime response with these parameters
+   */
+  export interface Response {
+    /**
+     * Controls which conversation the response is added to. Currently supports `auto`
+     * and `none`, with `auto` as the default value. The `auto` value means that the
+     * contents of the response will be added to the default conversation. Set this to
+     * `none` to create an out-of-band response which will not add items to default
+     * conversation.
+     */
+    conversation?: (string & {}) | 'auto' | 'none';
+
+    /**
+     * Input items to include in the prompt for the model. Using this field creates a
+     * new context for this Response instead of using the default conversation. An
+     * empty array `[]` will clear the context for this Response. Note that this can
+     * include references to items from the default conversation.
+     */
+    input?: Array<RealtimeAPI.ConversationItem>;
+
+    /**
+     * The default system instructions (i.e. system message) prepended to model calls.
+     * This field allows the client to guide the model on desired responses. The model
+     * can be instructed on response content and format, (e.g. "be extremely succinct",
+     * "act friendly", "here are examples of good responses") and on audio behavior
+     * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+     * instructions are not guaranteed to be followed by the model, but they provide
+     * guidance to the model on the desired behavior.
+     *
+     * Note that the server sets default instructions which will be used if this field
+     * is not set and are visible in the `session.created` event at the start of the
+     * session.
+     */
+    instructions?: string;
+
+    /**
+     * Maximum number of output tokens for a single assistant response, inclusive of
+     * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+     * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+     */
+    max_output_tokens?: number | 'inf';
+
+    /**
+     * Set of 16 key-value pairs that can be attached to an object. This can be useful
+     * for storing additional information about the object in a structured format, and
+     * querying for objects via API or the dashboard.
+     *
+     * Keys are strings with a maximum length of 64 characters. Values are strings with
+     * a maximum length of 512 characters.
+     */
+    metadata?: Shared.Metadata | null;
+
+    /**
+     * The set of modalities the model can respond with. To disable audio, set this to
+     * ["text"].
+     */
+    modalities?: Array<'text' | 'audio'>;
+
+    /**
+     * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+     */
+    output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
+    /**
+     * Reference to a prompt template and its variables.
+     * [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+     */
+    prompt?: ResponsesAPI.ResponsePrompt | null;
+
+    /**
+     * Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+     */
+    temperature?: number;
+
+    /**
+     * How the model chooses tools. Provide one of the string modes or force a specific
+     * function/MCP tool.
+     */
+    tool_choice?:
+      | ResponsesAPI.ToolChoiceOptions
+      | ResponsesAPI.ToolChoiceFunction
+      | ResponsesAPI.ToolChoiceMcp;
+
+    /**
+     * Tools (functions) available to the model.
+     */
+    tools?: Array<Response.Tool>;
+
+    /**
+     * The voice the model uses to respond. Voice cannot be changed during the session
+     * once the model has responded with audio at least once. Current voice options are
+     * `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
+     */
+    voice?:
+      | (string & {})
+      | 'alloy'
+      | 'ash'
+      | 'ballad'
+      | 'coral'
+      | 'echo'
+      | 'sage'
+      | 'shimmer'
+      | 'verse'
+      | 'marin'
+      | 'cedar';
+  }
+
+  export namespace Response {
+    export interface Tool {
+      /**
+       * The description of the function, including guidance on when and how to call it,
+       * and guidance about what to tell the user when calling (if anything).
+       */
+      description?: string;
+
+      /**
+       * The name of the function.
+       */
+      name?: string;
+
+      /**
+       * Parameters of the function in JSON Schema.
+       */
+      parameters?: unknown;
+
+      /**
+       * The type of the tool, i.e. `function`.
+       */
+      type?: 'function';
+    }
+  }
+}
+
+/**
+ * Returned when a new Response is created. The first event of response creation,
+ * where the response is in an initial state of `in_progress`.
+ */
+export interface ResponseCreatedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The response resource.
+   */
+  response: RealtimeResponse;
+
+  /**
+   * The event type, must be `response.created`.
+   */
+  type: 'response.created';
+}
+
+/**
+ * Returned when a Response is done streaming. Always emitted, no matter the final
+ * state. The Response object included in the `response.done` event will include
+ * all output Items in the Response but will omit the raw audio data.
+ */
+export interface ResponseDoneEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The response resource.
+   */
+  response: RealtimeResponse;
+
+  /**
+   * The event type, must be `response.done`.
+   */
+  type: 'response.done';
+}
+
+/**
+ * Returned when the model-generated function call arguments are updated.
+ */
+export interface ResponseFunctionCallArgumentsDeltaEvent {
+  /**
+   * The ID of the function call.
+   */
+  call_id: string;
+
+  /**
+   * The arguments delta as a JSON string.
+   */
+  delta: string;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the function call item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.function_call_arguments.delta`.
+   */
+  type: 'response.function_call_arguments.delta';
+}
+
+/**
+ * Returned when the model-generated function call arguments are done streaming.
+ * Also emitted when a Response is interrupted, incomplete, or cancelled.
+ */
+export interface ResponseFunctionCallArgumentsDoneEvent {
+  /**
+   * The final arguments as a JSON string.
+   */
+  arguments: string;
+
+  /**
+   * The ID of the function call.
+   */
+  call_id: string;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the function call item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.function_call_arguments.done`.
+   */
+  type: 'response.function_call_arguments.done';
+}
+
+/**
+ * Returned when MCP tool call arguments are updated during response generation.
+ */
+export interface ResponseMcpCallArgumentsDelta {
+  /**
+   * The JSON-encoded arguments delta.
+   */
+  delta: string;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP tool call item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.mcp_call_arguments.delta`.
+   */
+  type: 'response.mcp_call_arguments.delta';
+
+  /**
+   * If present, indicates the delta text was obfuscated.
+   */
+  obfuscation?: string | null;
+}
+
+/**
+ * Returned when MCP tool call arguments are finalized during response generation.
+ */
+export interface ResponseMcpCallArgumentsDone {
+  /**
+   * The final JSON-encoded arguments string.
+   */
+  arguments: string;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP tool call item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.mcp_call_arguments.done`.
+   */
+  type: 'response.mcp_call_arguments.done';
+}
+
+/**
+ * Returned when an MCP tool call has completed successfully.
+ */
+export interface ResponseMcpCallCompleted {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP tool call item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The event type, must be `response.mcp_call.completed`.
+   */
+  type: 'response.mcp_call.completed';
+}
+
+/**
+ * Returned when an MCP tool call has failed.
+ */
+export interface ResponseMcpCallFailed {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP tool call item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The event type, must be `response.mcp_call.failed`.
+   */
+  type: 'response.mcp_call.failed';
+}
+
+/**
+ * Returned when an MCP tool call has started and is in progress.
+ */
+export interface ResponseMcpCallInProgress {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the MCP tool call item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The event type, must be `response.mcp_call.in_progress`.
+   */
+  type: 'response.mcp_call.in_progress';
+}
+
+/**
+ * Returned when a new Item is created during Response generation.
+ */
+export interface ResponseOutputItemAddedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * A single item within a Realtime conversation.
+   */
+  item: ConversationItem;
+
+  /**
+   * The index of the output item in the Response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the Response to which the item belongs.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.output_item.added`.
+   */
+  type: 'response.output_item.added';
+}
+
+/**
+ * Returned when an Item is done streaming. Also emitted when a Response is
+ * interrupted, incomplete, or cancelled.
+ */
+export interface ResponseOutputItemDoneEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * A single item within a Realtime conversation.
+   */
+  item: ConversationItem;
+
+  /**
+   * The index of the output item in the Response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the Response to which the item belongs.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.output_item.done`.
+   */
+  type: 'response.output_item.done';
+}
+
+/**
+ * Returned when the text value of an "output_text" content part is updated.
+ */
+export interface ResponseTextDeltaEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * The text delta.
+   */
+  delta: string;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The event type, must be `response.output_text.delta`.
+   */
+  type: 'response.output_text.delta';
+}
+
+/**
+ * Returned when the text value of an "output_text" content part is done streaming.
+ * Also emitted when a Response is interrupted, incomplete, or cancelled.
+ */
+export interface ResponseTextDoneEvent {
+  /**
+   * The index of the content part in the item's content array.
+   */
+  content_index: number;
+
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * The ID of the item.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item in the response.
+   */
+  output_index: number;
+
+  /**
+   * The ID of the response.
+   */
+  response_id: string;
+
+  /**
+   * The final text content.
+   */
+  text: string;
+
+  /**
+   * The event type, must be `response.output_text.done`.
+   */
+  type: 'response.output_text.done';
+}
+
+/**
+ * Returned when a Session is created. Emitted automatically when a new connection
+ * is established as the first server event. This event will contain the default
+ * Session configuration.
+ */
+export interface SessionCreatedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * Realtime session object.
+   */
+  session: RealtimeSession;
+
+  /**
+   * The event type, must be `session.created`.
+   */
+  type: 'session.created';
+}
+
+/**
+ * Send this event to update the session’s default configuration. The client may
+ * send this event at any time to update any field, except for `voice`. However,
+ * note that once a session has been initialized with a particular `model`, it
+ * can’t be changed to another model using `session.update`.
+ *
+ * When the server receives a `session.update`, it will respond with a
+ * `session.updated` event showing the full, effective configuration. Only the
+ * fields that are present are updated. To clear a field like `instructions`, pass
+ * an empty string.
+ */
+export interface SessionUpdateEvent {
+  /**
+   * Realtime session object configuration.
+   */
+  session: RealtimeSessionCreateRequest;
+
+  /**
+   * The event type, must be `session.update`.
+   */
+  type: 'session.update';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Returned when a session is updated with a `session.update` event, unless there
+ * is an error.
+ */
+export interface SessionUpdatedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * Realtime session object.
+   */
+  session: RealtimeSession;
+
+  /**
+   * The event type, must be `session.updated`.
+   */
+  type: 'session.updated';
+}
+
+/**
+ * Returned when a transcription session is created.
+ */
+export interface TranscriptionSessionCreated {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * A Realtime transcription session configuration object.
+   */
+  session: TranscriptionSessionCreated.Session;
+
+  /**
+   * The event type, must be `transcription_session.created`.
+   */
+  type: 'transcription_session.created';
+}
+
+export namespace TranscriptionSessionCreated {
+  /**
+   * A Realtime transcription session configuration object.
+   */
+  export interface Session {
+    /**
+     * Unique identifier for the session that looks like `sess_1234567890abcdef`.
+     */
+    id?: string;
+
+    /**
+     * Configuration for input audio for the session.
+     */
+    audio?: Session.Audio;
+
+    /**
+     * Expiration timestamp for the session, in seconds since epoch.
+     */
+    expires_at?: number;
+
+    /**
+     * Additional fields to include in server outputs.
+     *
+     * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+     *   transcription.
+     */
+    include?: Array<'item.input_audio_transcription.logprobs'>;
+
+    /**
+     * The object type. Always `realtime.transcription_session`.
+     */
+    object?: string;
+  }
+
+  export namespace Session {
+    /**
+     * Configuration for input audio for the session.
+     */
+    export interface Audio {
+      input?: Audio.Input;
+    }
+
+    export namespace Audio {
+      export interface Input {
+        /**
+         * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+         */
+        format?: string;
+
+        /**
+         * Configuration for input audio noise reduction.
+         */
+        noise_reduction?: Input.NoiseReduction;
+
+        /**
+         * Configuration of the transcription model.
+         */
+        transcription?: Input.Transcription;
+
+        /**
+         * Configuration for turn detection.
+         */
+        turn_detection?: Input.TurnDetection;
+      }
+
+      export namespace Input {
+        /**
+         * Configuration for input audio noise reduction.
+         */
+        export interface NoiseReduction {
+          type?: 'near_field' | 'far_field';
+        }
+
+        /**
+         * Configuration of the transcription model.
+         */
+        export interface Transcription {
+          /**
+           * The language of the input audio. Supplying the input language in
+           * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+           * format will improve accuracy and latency.
+           */
+          language?: string;
+
+          /**
+           * The model to use for transcription. Can be `gpt-4o-transcribe`,
+           * `gpt-4o-mini-transcribe`, or `whisper-1`.
+           */
+          model?: 'gpt-4o-transcribe' | 'gpt-4o-mini-transcribe' | 'whisper-1';
+
+          /**
+           * An optional text to guide the model's style or continue a previous audio
+           * segment. The
+           * [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+           * should match the audio language.
+           */
+          prompt?: string;
+        }
+
+        /**
+         * Configuration for turn detection.
+         */
+        export interface TurnDetection {
+          prefix_padding_ms?: number;
+
+          silence_duration_ms?: number;
+
+          threshold?: number;
+
+          /**
+           * Type of turn detection, only `server_vad` is currently supported.
+           */
+          type?: string;
+        }
+      }
+    }
+  }
+}
+
+/**
+ * Send this event to update a transcription session.
+ */
+export interface TranscriptionSessionUpdate {
+  /**
+   * Realtime transcription session object configuration.
+   */
+  session: RealtimeTranscriptionSessionCreateRequest;
+
+  /**
+   * The event type, must be `transcription_session.update`.
+   */
+  type: 'transcription_session.update';
+
+  /**
+   * Optional client-generated ID used to identify this event.
+   */
+  event_id?: string;
+}
+
+/**
+ * Returned when a transcription session is updated with a
+ * `transcription_session.update` event, unless there is an error.
+ */
+export interface TranscriptionSessionUpdatedEvent {
+  /**
+   * The unique ID of the server event.
+   */
+  event_id: string;
+
+  /**
+   * A Realtime transcription session configuration object.
+   */
+  session: TranscriptionSessionUpdatedEvent.Session;
+
+  /**
+   * The event type, must be `transcription_session.updated`.
+   */
+  type: 'transcription_session.updated';
+}
+
+export namespace TranscriptionSessionUpdatedEvent {
+  /**
+   * A Realtime transcription session configuration object.
+   */
+  export interface Session {
+    /**
+     * Unique identifier for the session that looks like `sess_1234567890abcdef`.
+     */
+    id?: string;
+
+    /**
+     * Configuration for input audio for the session.
+     */
+    audio?: Session.Audio;
+
+    /**
+     * Expiration timestamp for the session, in seconds since epoch.
+     */
+    expires_at?: number;
+
+    /**
+     * Additional fields to include in server outputs.
+     *
+     * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+     *   transcription.
+     */
+    include?: Array<'item.input_audio_transcription.logprobs'>;
+
+    /**
+     * The object type. Always `realtime.transcription_session`.
+     */
+    object?: string;
+  }
+
+  export namespace Session {
+    /**
+     * Configuration for input audio for the session.
+     */
+    export interface Audio {
+      input?: Audio.Input;
+    }
+
+    export namespace Audio {
+      export interface Input {
+        /**
+         * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+         */
+        format?: string;
+
+        /**
+         * Configuration for input audio noise reduction.
+         */
+        noise_reduction?: Input.NoiseReduction;
+
+        /**
+         * Configuration of the transcription model.
+         */
+        transcription?: Input.Transcription;
+
+        /**
+         * Configuration for turn detection.
+         */
+        turn_detection?: Input.TurnDetection;
+      }
+
+      export namespace Input {
+        /**
+         * Configuration for input audio noise reduction.
+         */
+        export interface NoiseReduction {
+          type?: 'near_field' | 'far_field';
+        }
+
+        /**
+         * Configuration of the transcription model.
+         */
+        export interface Transcription {
+          /**
+           * The language of the input audio. Supplying the input language in
+           * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+           * format will improve accuracy and latency.
+           */
+          language?: string;
+
+          /**
+           * The model to use for transcription. Can be `gpt-4o-transcribe`,
+           * `gpt-4o-mini-transcribe`, or `whisper-1`.
+           */
+          model?: 'gpt-4o-transcribe' | 'gpt-4o-mini-transcribe' | 'whisper-1';
+
+          /**
+           * An optional text to guide the model's style or continue a previous audio
+           * segment. The
+           * [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+           * should match the audio language.
+           */
+          prompt?: string;
+        }
+
+        /**
+         * Configuration for turn detection.
+         */
+        export interface TurnDetection {
+          prefix_padding_ms?: number;
+
+          silence_duration_ms?: number;
+
+          threshold?: number;
+
+          /**
+           * Type of turn detection, only `server_vad` is currently supported.
+           */
+          type?: string;
+        }
+      }
+    }
+  }
+}
+
+Realtime.ClientSecrets = ClientSecrets;
+
+export declare namespace Realtime {
+  export {
+    type ConversationCreatedEvent as ConversationCreatedEvent,
+    type ConversationItem as ConversationItem,
+    type ConversationItemAdded as ConversationItemAdded,
+    type ConversationItemCreateEvent as ConversationItemCreateEvent,
+    type ConversationItemCreatedEvent as ConversationItemCreatedEvent,
+    type ConversationItemDeleteEvent as ConversationItemDeleteEvent,
+    type ConversationItemDeletedEvent as ConversationItemDeletedEvent,
+    type ConversationItemDone as ConversationItemDone,
+    type ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent,
+    type ConversationItemInputAudioTranscriptionDeltaEvent as ConversationItemInputAudioTranscriptionDeltaEvent,
+    type ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
+    type ConversationItemInputAudioTranscriptionSegment as ConversationItemInputAudioTranscriptionSegment,
+    type ConversationItemRetrieveEvent as ConversationItemRetrieveEvent,
+    type ConversationItemTruncateEvent as ConversationItemTruncateEvent,
+    type ConversationItemTruncatedEvent as ConversationItemTruncatedEvent,
+    type ConversationItemWithReference as ConversationItemWithReference,
+    type InputAudioBufferAppendEvent as InputAudioBufferAppendEvent,
+    type InputAudioBufferClearEvent as InputAudioBufferClearEvent,
+    type InputAudioBufferClearedEvent as InputAudioBufferClearedEvent,
+    type InputAudioBufferCommitEvent as InputAudioBufferCommitEvent,
+    type InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent,
+    type InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent,
+    type InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent,
+    type InputAudioBufferTimeoutTriggered as InputAudioBufferTimeoutTriggered,
+    type LogProbProperties as LogProbProperties,
+    type McpListToolsCompleted as McpListToolsCompleted,
+    type McpListToolsFailed as McpListToolsFailed,
+    type McpListToolsInProgress as McpListToolsInProgress,
+    type OutputAudioBufferClearEvent as OutputAudioBufferClearEvent,
+    type RateLimitsUpdatedEvent as RateLimitsUpdatedEvent,
+    type RealtimeAudioConfig as RealtimeAudioConfig,
+    type RealtimeClientEvent as RealtimeClientEvent,
+    type RealtimeClientSecretConfig as RealtimeClientSecretConfig,
+    type RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage,
+    type RealtimeConversationItemFunctionCall as RealtimeConversationItemFunctionCall,
+    type RealtimeConversationItemFunctionCallOutput as RealtimeConversationItemFunctionCallOutput,
+    type RealtimeConversationItemSystemMessage as RealtimeConversationItemSystemMessage,
+    type RealtimeConversationItemUserMessage as RealtimeConversationItemUserMessage,
+    type RealtimeError as RealtimeError,
+    type RealtimeErrorEvent as RealtimeErrorEvent,
+    type RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest,
+    type RealtimeMcpApprovalResponse as RealtimeMcpApprovalResponse,
+    type RealtimeMcpListTools as RealtimeMcpListTools,
+    type RealtimeMcpProtocolError as RealtimeMcpProtocolError,
+    type RealtimeMcpToolCall as RealtimeMcpToolCall,
+    type RealtimeMcpToolExecutionError as RealtimeMcpToolExecutionError,
+    type RealtimeMcphttpError as RealtimeMcphttpError,
+    type RealtimeResponse as RealtimeResponse,
+    type RealtimeResponseStatus as RealtimeResponseStatus,
+    type RealtimeResponseUsage as RealtimeResponseUsage,
+    type RealtimeResponseUsageInputTokenDetails as RealtimeResponseUsageInputTokenDetails,
+    type RealtimeResponseUsageOutputTokenDetails as RealtimeResponseUsageOutputTokenDetails,
+    type RealtimeServerEvent as RealtimeServerEvent,
+    type RealtimeSession as RealtimeSession,
+    type RealtimeSessionCreateRequest as RealtimeSessionCreateRequest,
+    type RealtimeToolChoiceConfig as RealtimeToolChoiceConfig,
+    type RealtimeToolsConfig as RealtimeToolsConfig,
+    type RealtimeToolsConfigUnion as RealtimeToolsConfigUnion,
+    type RealtimeTracingConfig as RealtimeTracingConfig,
+    type RealtimeTranscriptionSessionCreateRequest as RealtimeTranscriptionSessionCreateRequest,
+    type RealtimeTruncation as RealtimeTruncation,
+    type ResponseAudioDeltaEvent as ResponseAudioDeltaEvent,
+    type ResponseAudioDoneEvent as ResponseAudioDoneEvent,
+    type ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+    type ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent,
+    type ResponseCancelEvent as ResponseCancelEvent,
+    type ResponseContentPartAddedEvent as ResponseContentPartAddedEvent,
+    type ResponseContentPartDoneEvent as ResponseContentPartDoneEvent,
+    type ResponseCreateEvent as ResponseCreateEvent,
+    type ResponseCreatedEvent as ResponseCreatedEvent,
+    type ResponseDoneEvent as ResponseDoneEvent,
+    type ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+    type ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+    type ResponseMcpCallArgumentsDelta as ResponseMcpCallArgumentsDelta,
+    type ResponseMcpCallArgumentsDone as ResponseMcpCallArgumentsDone,
+    type ResponseMcpCallCompleted as ResponseMcpCallCompleted,
+    type ResponseMcpCallFailed as ResponseMcpCallFailed,
+    type ResponseMcpCallInProgress as ResponseMcpCallInProgress,
+    type ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent,
+    type ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent,
+    type ResponseTextDeltaEvent as ResponseTextDeltaEvent,
+    type ResponseTextDoneEvent as ResponseTextDoneEvent,
+    type SessionCreatedEvent as SessionCreatedEvent,
+    type SessionUpdateEvent as SessionUpdateEvent,
+    type SessionUpdatedEvent as SessionUpdatedEvent,
+    type TranscriptionSessionCreated as TranscriptionSessionCreated,
+    type TranscriptionSessionUpdate as TranscriptionSessionUpdate,
+    type TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent,
+  };
+
+  export {
+    ClientSecrets as ClientSecrets,
+    type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse,
+    type ClientSecretCreateResponse as ClientSecretCreateResponse,
+    type ClientSecretCreateParams as ClientSecretCreateParams,
+  };
+}
diff --git a/src/resources/responses/responses.ts b/src/resources/responses/responses.ts
index 5512b0e11..5a8f1a446 100644
--- a/src/resources/responses/responses.ts
+++ b/src/resources/responses/responses.ts
@@ -463,7 +463,7 @@ export interface Response {
    * An array of tools the model may call while generating a response. You can
    * specify which tool to use by setting the `tool_choice` parameter.
    *
-   * The two categories of tools you can provide the model are:
+   * We support the following categories of tools:
    *
    * - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
    *   capabilities, like
@@ -471,6 +471,9 @@ export interface Response {
    *   [file search](https://platform.openai.com/docs/guides/tools-file-search).
    *   Learn more about
    *   [built-in tools](https://platform.openai.com/docs/guides/tools).
+   * - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+   *   predefined connectors such as Google Drive and Notion. Learn more about
+   *   [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
    * - **Function calls (custom tools)**: Functions that are defined by you, enabling
    *   the model to call your own code with strongly typed arguments and outputs.
    *   Learn more about
@@ -4654,89 +4657,15 @@ export type Tool =
   | FunctionTool
   | FileSearchTool
   | ComputerTool
-  | Tool.WebSearchTool
+  | WebSearchTool
   | Tool.Mcp
   | Tool.CodeInterpreter
   | Tool.ImageGeneration
   | Tool.LocalShell
   | CustomTool
-  | WebSearchTool;
+  | WebSearchPreviewTool;
 
 export namespace Tool {
-  /**
-   * Search the Internet for sources related to the prompt. Learn more about the
-   * [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
-   */
-  export interface WebSearchTool {
-    /**
-     * The type of the web search tool. One of `web_search` or `web_search_2025_08_26`.
-     */
-    type: 'web_search' | 'web_search_2025_08_26';
-
-    /**
-     * Filters for the search.
-     */
-    filters?: WebSearchTool.Filters | null;
-
-    /**
-     * High level guidance for the amount of context window space to use for the
-     * search. One of `low`, `medium`, or `high`. `medium` is the default.
-     */
-    search_context_size?: 'low' | 'medium' | 'high';
-
-    /**
-     * The approximate location of the user.
-     */
-    user_location?: WebSearchTool.UserLocation | null;
-  }
-
-  export namespace WebSearchTool {
-    /**
-     * Filters for the search.
-     */
-    export interface Filters {
-      /**
-       * Allowed domains for the search. If not provided, all domains are allowed.
-       * Subdomains of the provided domains are allowed as well.
-       *
-       * Example: `["pubmed.ncbi.nlm.nih.gov"]`
-       */
-      allowed_domains?: Array<string> | null;
-    }
-
-    /**
-     * The approximate location of the user.
-     */
-    export interface UserLocation {
-      /**
-       * Free text input for the city of the user, e.g. `San Francisco`.
-       */
-      city?: string | null;
-
-      /**
-       * The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
-       * the user, e.g. `US`.
-       */
-      country?: string | null;
-
-      /**
-       * Free text input for the region of the user, e.g. `California`.
-       */
-      region?: string | null;
-
-      /**
-       * The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
-       * user, e.g. `America/Los_Angeles`.
-       */
-      timezone?: string | null;
-
-      /**
-       * The type of location approximation. Always `approximate`.
-       */
-      type?: 'approximate';
-    }
-  }
-
   /**
    * Give the model access to additional tools via remote Model Context Protocol
    * (MCP) servers.
@@ -5151,7 +5080,7 @@ export interface ToolChoiceTypes {
  * about the
  * [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
  */
-export interface WebSearchTool {
+export interface WebSearchPreviewTool {
   /**
    * The type of the web search tool. One of `web_search_preview` or
    * `web_search_preview_2025_03_11`.
@@ -5167,10 +5096,10 @@ export interface WebSearchTool {
   /**
    * The user's location.
    */
-  user_location?: WebSearchTool.UserLocation | null;
+  user_location?: WebSearchPreviewTool.UserLocation | null;
 }
 
-export namespace WebSearchTool {
+export namespace WebSearchPreviewTool {
   /**
    * The user's location.
    */
@@ -5204,6 +5133,80 @@ export namespace WebSearchTool {
   }
 }
 
+/**
+ * Search the Internet for sources related to the prompt. Learn more about the
+ * [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
+ */
+export interface WebSearchTool {
+  /**
+   * The type of the web search tool. One of `web_search` or `web_search_2025_08_26`.
+   */
+  type: 'web_search' | 'web_search_2025_08_26';
+
+  /**
+   * Filters for the search.
+   */
+  filters?: WebSearchTool.Filters | null;
+
+  /**
+   * High level guidance for the amount of context window space to use for the
+   * search. One of `low`, `medium`, or `high`. `medium` is the default.
+   */
+  search_context_size?: 'low' | 'medium' | 'high';
+
+  /**
+   * The approximate location of the user.
+   */
+  user_location?: WebSearchTool.UserLocation | null;
+}
+
+export namespace WebSearchTool {
+  /**
+   * Filters for the search.
+   */
+  export interface Filters {
+    /**
+     * Allowed domains for the search. If not provided, all domains are allowed.
+     * Subdomains of the provided domains are allowed as well.
+     *
+     * Example: `["pubmed.ncbi.nlm.nih.gov"]`
+     */
+    allowed_domains?: Array<string> | null;
+  }
+
+  /**
+   * The approximate location of the user.
+   */
+  export interface UserLocation {
+    /**
+     * Free text input for the city of the user, e.g. `San Francisco`.
+     */
+    city?: string | null;
+
+    /**
+     * The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+     * the user, e.g. `US`.
+     */
+    country?: string | null;
+
+    /**
+     * Free text input for the region of the user, e.g. `California`.
+     */
+    region?: string | null;
+
+    /**
+     * The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+     * user, e.g. `America/Los_Angeles`.
+     */
+    timezone?: string | null;
+
+    /**
+     * The type of location approximation. Always `approximate`.
+     */
+    type?: 'approximate';
+  }
+}
+
 export type ResponseCreateParams = ResponseCreateParamsNonStreaming | ResponseCreateParamsStreaming;
 
 export interface ResponseCreateParamsBase {
@@ -5410,7 +5413,7 @@ export interface ResponseCreateParamsBase {
    * An array of tools the model may call while generating a response. You can
    * specify which tool to use by setting the `tool_choice` parameter.
    *
-   * The two categories of tools you can provide the model are:
+   * We support the following categories of tools:
    *
    * - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
    *   capabilities, like
@@ -5418,6 +5421,9 @@ export interface ResponseCreateParamsBase {
    *   [file search](https://platform.openai.com/docs/guides/tools-file-search).
    *   Learn more about
    *   [built-in tools](https://platform.openai.com/docs/guides/tools).
+   * - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+   *   predefined connectors such as Google Drive and Notion. Learn more about
+   *   [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
    * - **Function calls (custom tools)**: Functions that are defined by you, enabling
    *   the model to call your own code with strongly typed arguments and outputs.
    *   Learn more about
@@ -5673,6 +5679,7 @@ export declare namespace Responses {
     type ToolChoiceMcp as ToolChoiceMcp,
     type ToolChoiceOptions as ToolChoiceOptions,
     type ToolChoiceTypes as ToolChoiceTypes,
+    type WebSearchPreviewTool as WebSearchPreviewTool,
     type WebSearchTool as WebSearchTool,
     type ResponseCreateParams as ResponseCreateParams,
     type ResponseCreateParamsNonStreaming as ResponseCreateParamsNonStreaming,
diff --git a/src/resources/webhooks.ts b/src/resources/webhooks.ts
index fa337478b..7449d0830 100644
--- a/src/resources/webhooks.ts
+++ b/src/resources/webhooks.ts
@@ -559,6 +559,70 @@ export namespace FineTuningJobSucceededWebhookEvent {
   }
 }
 
+/**
+ * Sent when Realtime API Receives a incoming SIP call.
+ */
+export interface RealtimeCallIncomingWebhookEvent {
+  /**
+   * The unique ID of the event.
+   */
+  id: string;
+
+  /**
+   * The Unix timestamp (in seconds) of when the model response was completed.
+   */
+  created_at: number;
+
+  /**
+   * Event data payload.
+   */
+  data: RealtimeCallIncomingWebhookEvent.Data;
+
+  /**
+   * The type of the event. Always `realtime.call.incoming`.
+   */
+  type: 'realtime.call.incoming';
+
+  /**
+   * The object of the event. Always `event`.
+   */
+  object?: 'event';
+}
+
+export namespace RealtimeCallIncomingWebhookEvent {
+  /**
+   * Event data payload.
+   */
+  export interface Data {
+    /**
+     * The unique ID of this call.
+     */
+    call_id: string;
+
+    /**
+     * Headers from the SIP Invite.
+     */
+    sip_headers: Array<Data.SipHeader>;
+  }
+
+  export namespace Data {
+    /**
+     * A header from the SIP Invite.
+     */
+    export interface SipHeader {
+      /**
+       * Name of the SIP Header.
+       */
+      name: string;
+
+      /**
+       * Value of the SIP Header.
+       */
+      value: string;
+    }
+  }
+}
+
 /**
  * Sent when a background response has been cancelled.
  */
@@ -741,6 +805,7 @@ export type UnwrapWebhookEvent =
   | FineTuningJobCancelledWebhookEvent
   | FineTuningJobFailedWebhookEvent
   | FineTuningJobSucceededWebhookEvent
+  | RealtimeCallIncomingWebhookEvent
   | ResponseCancelledWebhookEvent
   | ResponseCompletedWebhookEvent
   | ResponseFailedWebhookEvent
@@ -758,6 +823,7 @@ export declare namespace Webhooks {
     type FineTuningJobCancelledWebhookEvent as FineTuningJobCancelledWebhookEvent,
     type FineTuningJobFailedWebhookEvent as FineTuningJobFailedWebhookEvent,
     type FineTuningJobSucceededWebhookEvent as FineTuningJobSucceededWebhookEvent,
+    type RealtimeCallIncomingWebhookEvent as RealtimeCallIncomingWebhookEvent,
     type ResponseCancelledWebhookEvent as ResponseCancelledWebhookEvent,
     type ResponseCompletedWebhookEvent as ResponseCompletedWebhookEvent,
     type ResponseFailedWebhookEvent as ResponseFailedWebhookEvent,
diff --git a/src/version.ts b/src/version.ts
index cf8aa5418..02ab094c5 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '5.16.0'; // x-release-please-version
+export const VERSION = '5.17.0'; // x-release-please-version
diff --git a/tests/api-resources/beta/realtime/transcription-sessions.test.ts b/tests/api-resources/beta/realtime/transcription-sessions.test.ts
deleted file mode 100644
index 2c7cbbb15..000000000
--- a/tests/api-resources/beta/realtime/transcription-sessions.test.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-import OpenAI from 'openai';
-
-const client = new OpenAI({
-  apiKey: 'My API Key',
-  baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010',
-});
-
-describe('resource transcriptionSessions', () => {
-  test('create', async () => {
-    const responsePromise = client.beta.realtime.transcriptionSessions.create({});
-    const rawResponse = await responsePromise.asResponse();
-    expect(rawResponse).toBeInstanceOf(Response);
-    const response = await responsePromise;
-    expect(response).not.toBeInstanceOf(Response);
-    const dataAndResponse = await responsePromise.withResponse();
-    expect(dataAndResponse.data).toBe(response);
-    expect(dataAndResponse.response).toBe(rawResponse);
-  });
-});
diff --git a/tests/api-resources/beta/realtime/sessions.test.ts b/tests/api-resources/realtime/client-secrets.test.ts
similarity index 86%
rename from tests/api-resources/beta/realtime/sessions.test.ts
rename to tests/api-resources/realtime/client-secrets.test.ts
index 1a75a532c..105cdfe7f 100644
--- a/tests/api-resources/beta/realtime/sessions.test.ts
+++ b/tests/api-resources/realtime/client-secrets.test.ts
@@ -7,9 +7,9 @@ const client = new OpenAI({
   baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010',
 });
 
-describe('resource sessions', () => {
+describe('resource clientSecrets', () => {
   test('create', async () => {
-    const responsePromise = client.beta.realtime.sessions.create({});
+    const responsePromise = client.realtime.clientSecrets.create({});
     const rawResponse = await responsePromise.asResponse();
     expect(rawResponse).toBeInstanceOf(Response);
     const response = await responsePromise;