chore(api): fix realtime GA types

stainless-app[bot] · stainless-app[bot] · commit 4ed1b447b442 · 2025-09-10T15:05:06.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-7807ec6037efcee1af7decbfd3974a42b761fb6c6a71b4050fe43484d7fcbac4.yml
-openapi_spec_hash: da6851e3891ad2659a50ed6a736fd32a
-config_hash: 74d955cdc2377213f5268ea309090f6c
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
+openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+config_hash: 930dac3aa861344867e4ac84f037b5df
diff --git a/api.md b/api.md
@@ -805,7 +805,6 @@ Types:
 - <code><a href="./src/resources/realtime/realtime.ts">McpListToolsCompleted</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">McpListToolsFailed</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">McpListToolsInProgress</a></code>
-- <code><a href="./src/resources/realtime/realtime.ts">Models</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">NoiseReductionType</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">OutputAudioBufferClearEvent</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">RateLimitsUpdatedEvent</a></code>
@@ -822,6 +821,7 @@ Types:
 - <code><a href="./src/resources/realtime/realtime.ts">RealtimeConversationItemUserMessage</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">RealtimeError</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">RealtimeErrorEvent</a></code>
+- <code><a href="./src/resources/realtime/realtime.ts">RealtimeFunctionTool</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpApprovalRequest</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpApprovalResponse</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpListTools</a></code>
@@ -874,7 +874,6 @@ Types:
 - <code><a href="./src/resources/realtime/realtime.ts">SessionCreatedEvent</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">SessionUpdateEvent</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">SessionUpdatedEvent</a></code>
-- <code><a href="./src/resources/realtime/realtime.ts">TranscriptionSessionCreated</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">TranscriptionSessionUpdate</a></code>
 - <code><a href="./src/resources/realtime/realtime.ts">TranscriptionSessionUpdatedEvent</a></code>
 
@@ -884,9 +883,7 @@ Types:
 
 - <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeSessionClientSecret</a></code>
 - <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeSessionCreateResponse</a></code>
-- <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeTranscriptionSessionClientSecret</a></code>
 - <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeTranscriptionSessionCreateResponse</a></code>
-- <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeTranscriptionSessionInputAudioTranscription</a></code>
 - <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeTranscriptionSessionTurnDetection</a></code>
 - <code><a href="./src/resources/realtime/client-secrets.ts">ClientSecretCreateResponse</a></code>
 
diff --git a/src/resources/realtime/client-secrets.ts b/src/resources/realtime/client-secrets.ts
@@ -1,6 +1,7 @@
 // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 import { APIResource } from '../../core/resource';
+import * as ClientSecretsAPI from './client-secrets';
 import * as RealtimeAPI from './realtime';
 import * as ResponsesAPI from '../responses/responses';
 import { APIPromise } from '../../core/api-promise';
@@ -39,14 +40,19 @@ export interface RealtimeSessionClientSecret {
  */
 export interface RealtimeSessionCreateResponse {
   /**
-   * Configuration for input and output audio.
+   * Ephemeral key returned by the API.
    */
-  audio?: RealtimeSessionCreateResponse.Audio;
+  client_secret: RealtimeSessionClientSecret;
 
   /**
-   * Ephemeral key returned by the API.
+   * The type of session to create. Always `realtime` for the Realtime API.
+   */
+  type: 'realtime';
+
+  /**
+   * Configuration for input and output audio.
    */
-  client_secret?: RealtimeSessionClientSecret;
+  audio?: RealtimeSessionCreateResponse.Audio;
 
   /**
    * Additional fields to include in server outputs.
@@ -115,7 +121,7 @@ export interface RealtimeSessionCreateResponse {
   /**
    * Tools available to the model.
    */
-  tools?: Array<RealtimeAPI.Models | RealtimeSessionCreateResponse.McpTool>;
+  tools?: Array<RealtimeAPI.RealtimeFunctionTool | RealtimeSessionCreateResponse.McpTool>;
 
   /**
    * Realtime API can write session traces to the
@@ -132,11 +138,6 @@ export interface RealtimeSessionCreateResponse {
    * The default is `auto`.
    */
   truncation?: RealtimeAPI.RealtimeTruncation;
-
-  /**
-   * The type of session to create. Always `realtime` for the Realtime API.
-   */
-  type?: 'realtime';
 }
 
 export namespace RealtimeSessionCreateResponse {
@@ -238,7 +239,7 @@ export namespace RealtimeSessionCreateResponse {
 
         /**
          * Optional idle timeout after which turn detection will auto-timeout when no
-         * additional audio is received.
+         * additional audio is received and emits a `timeout_triggered` event.
          */
         idle_timeout_ms?: number | null;
 
@@ -491,87 +492,90 @@ export namespace RealtimeSessionCreateResponse {
 }
 
 /**
- * Ephemeral key returned by the API. Only present when the session is created on
- * the server via REST API.
+ * A Realtime transcription session configuration object.
  */
-export interface RealtimeTranscriptionSessionClientSecret {
-  /**
-   * Timestamp for when the token expires. Currently, all tokens expire after one
-   * minute.
-   */
-  expires_at: number;
-
+export interface RealtimeTranscriptionSessionCreateResponse {
   /**
-   * Ephemeral key usable in client environments to authenticate connections to the
-   * Realtime API. Use this in client-side environments rather than a standard API
-   * token, which should only be used server-side.
+   * Unique identifier for the session that looks like `sess_1234567890abcdef`.
    */
-  value: string;
-}
+  id: string;
 
-/**
- * A new Realtime transcription session configuration.
- *
- * When a session is created on the server via REST API, the session object also
- * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
- * not present when a session is updated via the WebSocket API.
- */
-export interface RealtimeTranscriptionSessionCreateResponse {
   /**
-   * Ephemeral key returned by the API. Only present when the session is created on
-   * the server via REST API.
+   * The object type. Always `realtime.transcription_session`.
    */
-  client_secret: RealtimeTranscriptionSessionClientSecret;
+  object: string;
 
   /**
-   * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+   * The type of session. Always `transcription` for transcription sessions.
    */
-  input_audio_format?: string;
+  type: 'transcription';
 
   /**
-   * Configuration of the transcription model.
+   * Configuration for input audio for the session.
    */
-  input_audio_transcription?: RealtimeTranscriptionSessionInputAudioTranscription;
+  audio?: RealtimeTranscriptionSessionCreateResponse.Audio;
 
   /**
-   * The set of modalities the model can respond with. To disable audio, set this to
-   * ["text"].
+   * Expiration timestamp for the session, in seconds since epoch.
    */
-  modalities?: Array<'text' | 'audio'>;
+  expires_at?: number;
 
   /**
-   * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
-   * means that the model will detect the start and end of speech based on audio
-   * volume and respond at the end of user speech.
+   * Additional fields to include in server outputs.
+   *
+   * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+   *   transcription.
    */
-  turn_detection?: RealtimeTranscriptionSessionTurnDetection;
+  include?: Array<'item.input_audio_transcription.logprobs'>;
 }
 
-/**
- * Configuration of the transcription model.
- */
-export interface RealtimeTranscriptionSessionInputAudioTranscription {
+export namespace RealtimeTranscriptionSessionCreateResponse {
   /**
-   * The language of the input audio. Supplying the input language in
-   * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
-   * format will improve accuracy and latency.
+   * Configuration for input audio for the session.
    */
-  language?: string;
+  export interface Audio {
+    input?: Audio.Input;
+  }
 
-  /**
-   * The model to use for transcription. Current options are `whisper-1`,
-   * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
-   */
-  model?: 'whisper-1' | 'gpt-4o-transcribe-latest' | 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe';
+  export namespace Audio {
+    export interface Input {
+      /**
+       * The PCM audio format. Only a 24kHz sample rate is supported.
+       */
+      format?: RealtimeAPI.RealtimeAudioFormats;
 
-  /**
-   * An optional text to guide the model's style or continue a previous audio
-   * segment. For `whisper-1`, the
-   * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
-   * For `gpt-4o-transcribe` models, the prompt is a free text string, for example
-   * "expect words related to technology".
-   */
-  prompt?: string;
+      /**
+       * Configuration for input audio noise reduction.
+       */
+      noise_reduction?: Input.NoiseReduction;
+
+      /**
+       * Configuration of the transcription model.
+       */
+      transcription?: RealtimeAPI.AudioTranscription;
+
+      /**
+       * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+       * means that the model will detect the start and end of speech based on audio
+       * volume and respond at the end of user speech.
+       */
+      turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection;
+    }
+
+    export namespace Input {
+      /**
+       * Configuration for input audio noise reduction.
+       */
+      export interface NoiseReduction {
+        /**
+         * Type of noise reduction. `near_field` is for close-talking microphones such as
+         * headphones, `far_field` is for far-field microphones such as laptop or
+         * conference room microphones.
+         */
+        type?: RealtimeAPI.NoiseReductionType;
+      }
+    }
+  }
 }
 
 /**
@@ -670,9 +674,7 @@ export declare namespace ClientSecrets {
   export {
     type RealtimeSessionClientSecret as RealtimeSessionClientSecret,
     type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse,
-    type RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret,
     type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse,
-    type RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription,
     type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection,
     type ClientSecretCreateResponse as ClientSecretCreateResponse,
     type ClientSecretCreateParams as ClientSecretCreateParams,
diff --git a/src/resources/realtime/index.ts b/src/resources/realtime/index.ts
@@ -4,9 +4,7 @@ export {
   ClientSecrets,
   type RealtimeSessionClientSecret,
   type RealtimeSessionCreateResponse,
-  type RealtimeTranscriptionSessionClientSecret,
   type RealtimeTranscriptionSessionCreateResponse,
-  type RealtimeTranscriptionSessionInputAudioTranscription,
   type RealtimeTranscriptionSessionTurnDetection,
   type ClientSecretCreateResponse,
   type ClientSecretCreateParams,
diff --git a/src/resources/realtime/realtime.ts b/src/resources/realtime/realtime.ts