feat(api): add new realtime and audio models, realtime session options

stainless-app[bot] · stainless-app[bot] · commit 1219f090b02b · 2025-06-03T16:54:57.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-2bcc845d8635bf93ddcf9ee723af4d7928248412a417bee5fc10d863a1e13867.yml
-openapi_spec_hash: 865230cb3abeb01bd85de05891af23c4
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-04213ea42074f52b8e7e60e101ed7d7ae47b8abcc233c7e8eae310bba544454d.yml
+openapi_spec_hash: 5fb148608764103ba3700cd6bda4f22e
 config_hash: ed1e6b3c5f93d12b80d31167f55c557c
diff --git a/src/resources/beta/realtime/realtime.ts b/src/resources/beta/realtime/realtime.ts
@@ -2162,6 +2162,7 @@ export namespace SessionUpdateEvent {
       | 'gpt-4o-realtime-preview'
       | 'gpt-4o-realtime-preview-2024-10-01'
       | 'gpt-4o-realtime-preview-2024-12-17'
+      | 'gpt-4o-realtime-preview-2025-06-03'
       | 'gpt-4o-mini-realtime-preview'
       | 'gpt-4o-mini-realtime-preview-2024-12-17';
 
@@ -2171,6 +2172,13 @@ export namespace SessionUpdateEvent {
      */
     output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
 
+    /**
+     * The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+     * minimum speed. 1.5 is the maximum speed. This value can only be changed in
+     * between model turns, not while a response is in progress.
+     */
+    speed?: number;
+
     /**
      * Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
      * temperature of 0.8 is highly recommended for best performance.
@@ -2188,6 +2196,15 @@ export namespace SessionUpdateEvent {
      */
     tools?: Array<Session.Tool>;
 
+    /**
+     * Configuration options for tracing. Set to null to disable tracing. Once tracing
+     * is enabled for a session, the configuration cannot be modified.
+     *
+     * `auto` will create a trace for the session with default values for the workflow
+     * name, group id, and metadata.
+     */
+    tracing?: 'auto' | Session.UnionMember1;
+
     /**
      * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
      * set to `null` to turn off, in which case the client must manually trigger model
@@ -2326,6 +2343,29 @@ export namespace SessionUpdateEvent {
       type?: 'function';
     }
 
+    /**
+     * Granular configuration for tracing.
+     */
+    export interface UnionMember1 {
+      /**
+       * The group id to attach to this trace to enable filtering and grouping in the
+       * traces dashboard.
+       */
+      group_id?: string;
+
+      /**
+       * The arbitrary metadata to attach to this trace to enable filtering in the traces
+       * dashboard.
+       */
+      metadata?: unknown;
+
+      /**
+       * The name of the workflow to attach to this trace. This is used to name the trace
+       * in the traces dashboard.
+       */
+      workflow_name?: string;
+    }
+
     /**
      * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
      * set to `null` to turn off, in which case the client must manually trigger model
diff --git a/src/resources/beta/realtime/sessions.ts b/src/resources/beta/realtime/sessions.ts
@@ -102,6 +102,7 @@ export interface Session {
     | 'gpt-4o-realtime-preview'
     | 'gpt-4o-realtime-preview-2024-10-01'
     | 'gpt-4o-realtime-preview-2024-12-17'
+    | 'gpt-4o-realtime-preview-2025-06-03'
     | 'gpt-4o-mini-realtime-preview'
     | 'gpt-4o-mini-realtime-preview-2024-12-17';
 
@@ -111,6 +112,13 @@ export interface Session {
    */
   output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
 
+  /**
+   * The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+   * minimum speed. 1.5 is the maximum speed. This value can only be changed in
+   * between model turns, not while a response is in progress.
+   */
+  speed?: number;
+
   /**
    * Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
    * temperature of 0.8 is highly recommended for best performance.
@@ -128,6 +136,15 @@ export interface Session {
    */
   tools?: Array<Session.Tool>;
 
+  /**
+   * Configuration options for tracing. Set to null to disable tracing. Once tracing
+   * is enabled for a session, the configuration cannot be modified.
+   *
+   * `auto` will create a trace for the session with default values for the workflow
+   * name, group id, and metadata.
+   */
+  tracing?: 'auto' | Session.UnionMember1;
+
   /**
    * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
    * set to `null` to turn off, in which case the client must manually trigger model
@@ -145,7 +162,8 @@ export interface Session {
   /**
    * The voice the model uses to respond. Voice cannot be changed during the session
    * once the model has responded with audio at least once. Current voice options are
-   * `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+   * `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`,
+   * `shimmer`, and `verse`.
    */
   voice?:
     | (string & {})
@@ -236,6 +254,29 @@ export namespace Session {
     type?: 'function';
   }
 
+  /**
+   * Granular configuration for tracing.
+   */
+  export interface UnionMember1 {
+    /**
+     * The group id to attach to this trace to enable filtering and grouping in the
+     * traces dashboard.
+     */
+    group_id?: string;
+
+    /**
+     * The arbitrary metadata to attach to this trace to enable filtering in the traces
+     * dashboard.
+     */
+    metadata?: unknown;
+
+    /**
+     * The name of the workflow to attach to this trace. This is used to name the trace
+     * in the traces dashboard.
+     */
+    workflow_name?: string;
+  }
+
   /**
    * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
    * set to `null` to turn off, in which case the client must manually trigger model
@@ -353,6 +394,13 @@ export interface SessionCreateResponse {
    */
   output_audio_format?: string;
 
+  /**
+   * The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+   * minimum speed. 1.5 is the maximum speed. This value can only be changed in
+   * between model turns, not while a response is in progress.
+   */
+  speed?: number;
+
   /**
    * Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
    */
@@ -369,6 +417,15 @@ export interface SessionCreateResponse {
    */
   tools?: Array<SessionCreateResponse.Tool>;
 
+  /**
+   * Configuration options for tracing. Set to null to disable tracing. Once tracing
+   * is enabled for a session, the configuration cannot be modified.
+   *
+   * `auto` will create a trace for the session with default values for the workflow
+   * name, group id, and metadata.
+   */
+  tracing?: 'auto' | SessionCreateResponse.UnionMember1;
+
   /**
    * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
    * means that the model will detect the start and end of speech based on audio
@@ -453,6 +510,29 @@ export namespace SessionCreateResponse {
     type?: 'function';
   }
 
+  /**
+   * Granular configuration for tracing.
+   */
+  export interface UnionMember1 {
+    /**
+     * The group id to attach to this trace to enable filtering and grouping in the
+     * traces dashboard.
+     */
+    group_id?: string;
+
+    /**
+     * The arbitrary metadata to attach to this trace to enable filtering in the traces
+     * dashboard.
+     */
+    metadata?: unknown;
+
+    /**
+     * The name of the workflow to attach to this trace. This is used to name the trace
+     * in the traces dashboard.
+     */
+    workflow_name?: string;
+  }
+
   /**
    * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
    * means that the model will detect the start and end of speech based on audio
@@ -555,6 +635,7 @@ export interface SessionCreateParams {
     | 'gpt-4o-realtime-preview'
     | 'gpt-4o-realtime-preview-2024-10-01'
     | 'gpt-4o-realtime-preview-2024-12-17'
+    | 'gpt-4o-realtime-preview-2025-06-03'
     | 'gpt-4o-mini-realtime-preview'
     | 'gpt-4o-mini-realtime-preview-2024-12-17';
 
@@ -564,6 +645,13 @@ export interface SessionCreateParams {
    */
   output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
 
+  /**
+   * The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+   * minimum speed. 1.5 is the maximum speed. This value can only be changed in
+   * between model turns, not while a response is in progress.
+   */
+  speed?: number;
+
   /**
    * Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
    * temperature of 0.8 is highly recommended for best performance.
@@ -581,6 +669,15 @@ export interface SessionCreateParams {
    */
   tools?: Array<SessionCreateParams.Tool>;
 
+  /**
+   * Configuration options for tracing. Set to null to disable tracing. Once tracing
+   * is enabled for a session, the configuration cannot be modified.
+   *
+   * `auto` will create a trace for the session with default values for the workflow
+   * name, group id, and metadata.
+   */
+  tracing?: 'auto' | SessionCreateParams.UnionMember1;
+
   /**
    * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
    * set to `null` to turn off, in which case the client must manually trigger model
@@ -719,6 +816,29 @@ export namespace SessionCreateParams {
     type?: 'function';
   }
 
+  /**
+   * Granular configuration for tracing.
+   */
+  export interface UnionMember1 {
+    /**
+     * The group id to attach to this trace to enable filtering and grouping in the
+     * traces dashboard.
+     */
+    group_id?: string;
+
+    /**
+     * The arbitrary metadata to attach to this trace to enable filtering in the traces
+     * dashboard.
+     */
+    metadata?: unknown;
+
+    /**
+     * The name of the workflow to attach to this trace. This is used to name the trace
+     * in the traces dashboard.
+     */
+    workflow_name?: string;
+  }
+
   /**
    * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
    * set to `null` to turn off, in which case the client must manually trigger model
diff --git a/src/resources/shared.ts b/src/resources/shared.ts
@@ -34,6 +34,7 @@ export type ChatModel =
   | 'gpt-4o-audio-preview'
   | 'gpt-4o-audio-preview-2024-10-01'
   | 'gpt-4o-audio-preview-2024-12-17'
+  | 'gpt-4o-audio-preview-2025-06-03'
   | 'gpt-4o-mini-audio-preview'
   | 'gpt-4o-mini-audio-preview-2024-12-17'
   | 'gpt-4o-search-preview'