openai
diff --git a/‎.stats.yml‎
Lines changed: 2 additions & 2 deletions b/‎.stats.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎lib/openai/models/realtime/input_audio_buffer_timeout_triggered.rb‎
Lines changed: 25 additions & 5 deletions b/‎lib/openai/models/realtime/input_audio_buffer_timeout_triggered.rb‎
Lines changed: 25 additions & 5 deletions
diff --git a/‎lib/openai/models/realtime/realtime_audio_config_input.rb‎
Lines changed: 14 additions & 11 deletions b/‎lib/openai/models/realtime/realtime_audio_config_input.rb‎
Lines changed: 14 additions & 11 deletions
@@ -1,4 +1,4 @@
 configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
-openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c829f9e7f51d4946dae7b02eb37eb857b538a464cf54c7ced5eff1b1c93e07db.yml
+openapi_spec_hash: 1b2eaba46b264bcec8831bc496543649
 config_hash: 930dac3aa861344867e4ac84f037b5df
@@ -5,13 +5,15 @@ module Models
     module Realtime
       class InputAudioBufferTimeoutTriggered < OpenAI::Internal::Type::BaseModel
         # @!attribute audio_end_ms
-        #   Millisecond offset where speech ended within the buffered audio.
+        #   Millisecond offset of audio written to the input audio buffer at the time the
+        #   timeout was triggered.
         #
         #   @return [Integer]
         required :audio_end_ms, Integer
 
         # @!attribute audio_start_ms
-        #   Millisecond offset where speech started within the buffered audio.
+        #   Millisecond offset of audio written to the input audio buffer that was after the
+        #   playback time of the last model response.
         #
         #   @return [Integer]
         required :audio_start_ms, Integer
@@ -35,11 +37,29 @@ class InputAudioBufferTimeoutTriggered < OpenAI::Internal::Type::BaseModel
         required :type, const: :"input_audio_buffer.timeout_triggered"
 
         # @!method initialize(audio_end_ms:, audio_start_ms:, event_id:, item_id:, type: :"input_audio_buffer.timeout_triggered")
-        #   Returned when the server VAD timeout is triggered for the input audio buffer.
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::InputAudioBufferTimeoutTriggered} for more details.
         #
-        #   @param audio_end_ms [Integer] Millisecond offset where speech ended within the buffered audio.
+        #   Returned when the Server VAD timeout is triggered for the input audio buffer.
+        #   This is configured with `idle_timeout_ms` in the `turn_detection` settings of
+        #   the session, and it indicates that there hasn't been any speech detected for the
+        #   configured duration.
         #
-        #   @param audio_start_ms [Integer] Millisecond offset where speech started within the buffered audio.
+        #   The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio
+        #   after the last model response up to the triggering time, as an offset from the
+        #   beginning of audio written to the input audio buffer. This means it demarcates
+        #   the segment of audio that was silent and the difference between the start and
+        #   end values will roughly match the configured timeout.
+        #
+        #   The empty audio will be committed to the conversation as an `input_audio` item
+        #   (there will be a `input_audio_buffer.committed` event) and a model response will
+        #   be generated. There may be speech that didn't trigger VAD but is still detected
+        #   by the model, so the model may respond with something relevant to the
+        #   conversation or a prompt to continue speaking.
+        #
+        #   @param audio_end_ms [Integer] Millisecond offset of audio written to the input audio buffer at the time the ti
+        #
+        #   @param audio_start_ms [Integer] Millisecond offset of audio written to the input audio buffer that was after the
         #
         #   @param event_id [String] The unique ID of the server event.
         #
 
@@ -36,17 +36,20 @@ class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel
         # @!attribute turn_detection
         #   Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
         #   set to `null` to turn off, in which case the client must manually trigger model
-        #   response. Server VAD means that the model will detect the start and end of
-        #   speech based on audio volume and respond at the end of user speech. Semantic VAD
-        #   is more advanced and uses a turn detection model (in conjunction with VAD) to
-        #   semantically estimate whether the user has finished speaking, then dynamically
-        #   sets a timeout based on this probability. For example, if user audio trails off
-        #   with "uhhm", the model will score a low probability of turn end and wait longer
-        #   for the user to continue speaking. This can be useful for more natural
-        #   conversations, but may have a higher latency.
+        #   response.
         #
-        #   @return [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection, nil]
-        optional :turn_detection, -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection }
+        #   Server VAD means that the model will detect the start and end of speech based on
+        #   audio volume and respond at the end of user speech.
+        #
+        #   Semantic VAD is more advanced and uses a turn detection model (in conjunction
+        #   with VAD) to semantically estimate whether the user has finished speaking, then
+        #   dynamically sets a timeout based on this probability. For example, if user audio
+        #   trails off with "uhhm", the model will score a low probability of turn end and
+        #   wait longer for the user to continue speaking. This can be useful for more
+        #   natural conversations, but may have a higher latency.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad, nil]
+        optional :turn_detection, union: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection }, nil?: true
 
         # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
         #   Some parameter documentations has been truncated, see
@@ -58,7 +61,7 @@ class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel
         #
         #   @param transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription, defaults to off and can be set to `
         #
-        #   @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+        #   @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad, nil] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
 
         # @see OpenAI::Models::Realtime::RealtimeAudioConfigInput#noise_reduction
         class NoiseReduction < OpenAI::Internal::Type::BaseModel