openai · stainless-app · Sep 12, 2025 · Sep 11, 2025 · Sep 11, 2025 · Sep 11, 2025
@@ -1,3 +1,3 @@
 {
-  ".": "5.20.1"
+  ".": "5.20.2"
 }
@@ -1,4 +1,4 @@
 configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
-openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-94b1e3cb0bdc616ff0c2f267c33dadd95f133b1f64e647aab6c64afb292b2793.yml
+openapi_spec_hash: 2395319ac9befd59b6536ae7f9564a05
 config_hash: 930dac3aa861344867e4ac84f037b5df
@@ -1,5 +1,18 @@
 # Changelog
 
+## 5.20.2 (2025-09-12)
+
+Full Changelog: [v5.20.1...v5.20.2](https://github.com/openai/openai-node/compare/v5.20.1...v5.20.2)
+
+### Bug Fixes
+
+* coerce nullable values to undefined ([836d1b4](https://github.com/openai/openai-node/commit/836d1b4cdd077c206e1c647c762f4c16e9db444c))
+
+
+### Chores
+
+* **api:** Minor docs and type updates for realtime ([ccb00dc](https://github.com/openai/openai-node/commit/ccb00dcbd1466976045aafee152cbc038bb293b9))
+
 ## 5.20.1 (2025-09-10)
 
 Full Changelog: [v5.20.0...v5.20.1](https://github.com/openai/openai-node/compare/v5.20.0...v5.20.1)

@@ -1,6 +1,6 @@
 {
   "name": "@openai/openai",
-  "version": "5.20.1",
+  "version": "5.20.2",
   "exports": {
     ".": "./index.ts",
     "./helpers/zod": "./helpers/zod.ts",

@@ -1,6 +1,6 @@
 {
   "name": "openai",
-  "version": "5.20.1",
+  "version": "5.20.2",
   "description": "The official TypeScript library for the OpenAI API",
   "author": "OpenAI <[email protected]>",
   "types": "dist/index.d.ts",

@@ -76,21 +76,21 @@ export const coerceBoolean = (value: unknown): boolean => {
 };
 
 export const maybeCoerceInteger = (value: unknown): number | undefined => {
-  if (value === undefined) {
+  if (value == null) {
     return undefined;
   }
   return coerceInteger(value);
 };
 
 export const maybeCoerceFloat = (value: unknown): number | undefined => {
-  if (value === undefined) {
+  if (value == null) {
     return undefined;
   }
   return coerceFloat(value);
 };
 
 export const maybeCoerceBoolean = (value: unknown): boolean | undefined => {
-  if (value === undefined) {
+  if (value == null) {
     return undefined;
   }
   return coerceBoolean(value);

@@ -181,16 +181,19 @@ export namespace RealtimeSessionCreateResponse {
       /**
        * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
        * set to `null` to turn off, in which case the client must manually trigger model
-       * response. Server VAD means that the model will detect the start and end of
-       * speech based on audio volume and respond at the end of user speech. Semantic VAD
-       * is more advanced and uses a turn detection model (in conjunction with VAD) to
-       * semantically estimate whether the user has finished speaking, then dynamically
-       * sets a timeout based on this probability. For example, if user audio trails off
-       * with "uhhm", the model will score a low probability of turn end and wait longer
-       * for the user to continue speaking. This can be useful for more natural
-       * conversations, but may have a higher latency.
+       * response.
+       *
+       * Server VAD means that the model will detect the start and end of speech based on
+       * audio volume and respond at the end of user speech.
+       *
+       * Semantic VAD is more advanced and uses a turn detection model (in conjunction
+       * with VAD) to semantically estimate whether the user has finished speaking, then
+       * dynamically sets a timeout based on this probability. For example, if user audio
+       * trails off with "uhhm", the model will score a low probability of turn end and
+       * wait longer for the user to continue speaking. This can be useful for more
+       * natural conversations, but may have a higher latency.
        */
-      turn_detection?: Input.TurnDetection;
+      turn_detection?: Input.ServerVad | Input.SemanticVad | null;
     }
 
     export namespace Input {
@@ -211,35 +214,34 @@ export namespace RealtimeSessionCreateResponse {
       }
 
       /**
-       * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
-       * set to `null` to turn off, in which case the client must manually trigger model
-       * response. Server VAD means that the model will detect the start and end of
-       * speech based on audio volume and respond at the end of user speech. Semantic VAD
-       * is more advanced and uses a turn detection model (in conjunction with VAD) to
-       * semantically estimate whether the user has finished speaking, then dynamically
-       * sets a timeout based on this probability. For example, if user audio trails off
-       * with "uhhm", the model will score a low probability of turn end and wait longer
-       * for the user to continue speaking. This can be useful for more natural
-       * conversations, but may have a higher latency.
+       * Server-side voice activity detection (VAD) which flips on when user speech is
+       * detected and off after a period of silence.
        */
-      export interface TurnDetection {
+      export interface ServerVad {
         /**
-         * Whether or not to automatically generate a response when a VAD stop event
-         * occurs.
+         * Type of turn detection, `server_vad` to turn on simple Server VAD.
          */
-        create_response?: boolean;
+        type: 'server_vad';
 
         /**
-         * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
-         * will wait longer for the user to continue speaking, `high` will respond more
-         * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
-         * and `high` have max timeouts of 8s, 4s, and 2s respectively.
+         * Whether or not to automatically generate a response when a VAD stop event
+         * occurs.
          */
-        eagerness?: 'low' | 'medium' | 'high' | 'auto';
+        create_response?: boolean;
 
         /**
-         * Optional idle timeout after which turn detection will auto-timeout when no
-         * additional audio is received and emits a `timeout_triggered` event.
+         * Optional timeout after which a model response will be triggered automatically.
+         * This is useful for situations in which a long pause from the user is unexpected,
+         * such as a phone call. The model will effectively prompt the user to continue the
+         * conversation based on the current context.
+         *
+         * The timeout value will be applied after the last model response's audio has
+         * finished playing, i.e. it's set to the `response.done` time plus audio playback
+         * duration.
+         *
+         * An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+         * Response) will be emitted when the timeout is reached. Idle timeout is currently
+         * only supported for `server_vad` mode.
          */
         idle_timeout_ms?: number | null;
 
@@ -269,11 +271,38 @@ export namespace RealtimeSessionCreateResponse {
          * model, and thus might perform better in noisy environments.
          */
         threshold?: number;
+      }
 
+      /**
+       * Server-side semantic turn detection which uses a model to determine when the
+       * user has finished speaking.
+       */
+      export interface SemanticVad {
         /**
-         * Type of turn detection.
+         * Type of turn detection, `semantic_vad` to turn on Semantic VAD.
          */
-        type?: 'server_vad' | 'semantic_vad';
+        type: 'semantic_vad';
+
+        /**
+         * Whether or not to automatically generate a response when a VAD stop event
+         * occurs.
+         */
+        create_response?: boolean;
+
+        /**
+         * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
+         * will wait longer for the user to continue speaking, `high` will respond more
+         * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
+         * and `high` have max timeouts of 8s, 4s, and 2s respectively.
+         */
+        eagerness?: 'low' | 'medium' | 'high' | 'auto';
+
+        /**
+         * Whether or not to automatically interrupt any ongoing response with output to
+         * the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+         * occurs.
+         */
+        interrupt_response?: boolean;
       }
     }