diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 3f63a672..7f3f5c84 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.22.1" + ".": "0.23.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index c41be6ee..36a3c7f5 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 118 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-51afd6abbcb18c3086f62993f9379c18443b9e516cbc0548ddfb932e835657f8.yml -openapi_spec_hash: dae6afeaefa15cb8700c7a870531e06f -config_hash: b854932c0ea24b400bdd64e4376936bd +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-7807ec6037efcee1af7decbfd3974a42b761fb6c6a71b4050fe43484d7fcbac4.yml +openapi_spec_hash: da6851e3891ad2659a50ed6a736fd32a +config_hash: 74d955cdc2377213f5268ea309090f6c diff --git a/CHANGELOG.md b/CHANGELOG.md index ad19411a..b997b4c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.23.0 (2025-09-08) + +Full Changelog: [v0.22.1...v0.23.0](https://github.com/openai/openai-ruby/compare/v0.22.1...v0.23.0) + +### Features + +* **api:** ship the RealtimeGA API shape ([6c59e2c](https://github.com/openai/openai-ruby/commit/6c59e2c78ea130b626442e2230676afcca3a906f)) + ## 0.22.1 (2025-09-05) Full Changelog: [v0.22.0...v0.22.1](https://github.com/openai/openai-ruby/compare/v0.22.0...v0.22.1) diff --git a/Gemfile.lock b/Gemfile.lock index 53d94610..8884cf10 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,7 +11,7 @@ GIT PATH remote: . specs: - openai (0.22.1) + openai (0.23.0) connection_pool GEM diff --git a/README.md b/README.md index fa04f2b9..886f17cc 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ To use this gem, install via Bundler by adding the following to your application ```ruby -gem "openai", "~> 0.22.1" +gem "openai", "~> 0.23.0" ``` diff --git a/lib/openai.rb b/lib/openai.rb index 2cf5f537..62c0ab89 100644 --- a/lib/openai.rb +++ b/lib/openai.rb @@ -380,6 +380,7 @@ require_relative "openai/models/moderation_multi_modal_input" require_relative "openai/models/moderation_text_input" require_relative "openai/models/other_file_chunking_strategy_object" +require_relative "openai/models/realtime/audio_transcription" require_relative "openai/models/realtime/client_secret_create_params" require_relative "openai/models/realtime/client_secret_create_response" require_relative "openai/models/realtime/conversation_created_event" @@ -410,11 +411,16 @@ require_relative "openai/models/realtime/mcp_list_tools_completed" require_relative "openai/models/realtime/mcp_list_tools_failed" require_relative "openai/models/realtime/mcp_list_tools_in_progress" +require_relative "openai/models/realtime/models" +require_relative "openai/models/realtime/noise_reduction_type" require_relative "openai/models/realtime/output_audio_buffer_clear_event" require_relative "openai/models/realtime/rate_limits_updated_event" require_relative "openai/models/realtime/realtime_audio_config" +require_relative "openai/models/realtime/realtime_audio_config_input" +require_relative "openai/models/realtime/realtime_audio_config_output" +require_relative "openai/models/realtime/realtime_audio_formats" +require_relative "openai/models/realtime/realtime_audio_input_turn_detection" require_relative "openai/models/realtime/realtime_client_event" -require_relative "openai/models/realtime/realtime_client_secret_config" require_relative "openai/models/realtime/realtime_conversation_item_assistant_message" require_relative "openai/models/realtime/realtime_conversation_item_function_call" require_relative "openai/models/realtime/realtime_conversation_item_function_call_output" @@ -430,20 +436,32 @@ require_relative "openai/models/realtime/realtime_mcp_tool_call" require_relative "openai/models/realtime/realtime_mcp_tool_execution_error" require_relative "openai/models/realtime/realtime_response" +require_relative "openai/models/realtime/realtime_response_create_audio_output" +require_relative "openai/models/realtime/realtime_response_create_mcp_tool" +require_relative "openai/models/realtime/realtime_response_create_params" require_relative "openai/models/realtime/realtime_response_status" require_relative "openai/models/realtime/realtime_response_usage" require_relative "openai/models/realtime/realtime_response_usage_input_token_details" require_relative "openai/models/realtime/realtime_response_usage_output_token_details" require_relative "openai/models/realtime/realtime_server_event" require_relative "openai/models/realtime/realtime_session" +require_relative "openai/models/realtime/realtime_session_client_secret" require_relative "openai/models/realtime/realtime_session_create_request" require_relative "openai/models/realtime/realtime_session_create_response" require_relative "openai/models/realtime/realtime_tool_choice_config" require_relative "openai/models/realtime/realtime_tools_config" require_relative "openai/models/realtime/realtime_tools_config_union" require_relative "openai/models/realtime/realtime_tracing_config" +require_relative "openai/models/realtime/realtime_transcription_session_audio" +require_relative "openai/models/realtime/realtime_transcription_session_audio_input" +require_relative "openai/models/realtime/realtime_transcription_session_audio_input_turn_detection" +require_relative "openai/models/realtime/realtime_transcription_session_client_secret" require_relative "openai/models/realtime/realtime_transcription_session_create_request" +require_relative "openai/models/realtime/realtime_transcription_session_create_response" +require_relative "openai/models/realtime/realtime_transcription_session_input_audio_transcription" +require_relative "openai/models/realtime/realtime_transcription_session_turn_detection" require_relative "openai/models/realtime/realtime_truncation" +require_relative "openai/models/realtime/realtime_truncation_retention_ratio" require_relative "openai/models/realtime/response_audio_delta_event" require_relative "openai/models/realtime/response_audio_done_event" require_relative "openai/models/realtime/response_audio_transcript_delta_event" diff --git a/lib/openai/models/realtime/audio_transcription.rb b/lib/openai/models/realtime/audio_transcription.rb new file mode 100644 index 00000000..cf3d6698 --- /dev/null +++ b/lib/openai/models/realtime/audio_transcription.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class AudioTranscription < OpenAI::Internal::Type::BaseModel + # @!attribute language + # The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + # + # @return [String, nil] + optional :language, String + + # @!attribute model + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + # + # @return [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model, nil] + optional :model, enum: -> { OpenAI::Realtime::AudioTranscription::Model } + + # @!attribute prompt + # An optional text to guide the model's style or continue a previous audio + # segment. For `whisper-1`, the + # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). + # For `gpt-4o-transcribe` models, the prompt is a free text string, for example + # "expect words related to technology". + # + # @return [String, nil] + optional :prompt, String + + # @!method initialize(language: nil, model: nil, prompt: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::AudioTranscription} for more details. + # + # @param language [String] The language of the input audio. Supplying the input language in + # + # @param model [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-tra + # + # @param prompt [String] An optional text to guide the model's style or continue a previous audio + + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + # + # @see OpenAI::Models::Realtime::AudioTranscription#model + module Model + extend OpenAI::Internal::Type::Enum + + WHISPER_1 = :"whisper-1" + GPT_4O_TRANSCRIBE_LATEST = :"gpt-4o-transcribe-latest" + GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" + GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" + + # @!method self.values + # @return [Array] + end + end + end + end +end diff --git a/lib/openai/models/realtime/client_secret_create_params.rb b/lib/openai/models/realtime/client_secret_create_params.rb index 96695c65..f48f85c5 100644 --- a/lib/openai/models/realtime/client_secret_create_params.rb +++ b/lib/openai/models/realtime/client_secret_create_params.rb @@ -9,7 +9,10 @@ class ClientSecretCreateParams < OpenAI::Internal::Type::BaseModel include OpenAI::Internal::Type::RequestParameters # @!attribute expires_after - # Configuration for the ephemeral token expiration. + # Configuration for the client secret expiration. Expiration refers to the time + # after which a client secret will no longer be valid for creating sessions. The + # session itself may continue after that time once started. A secret can be used + # to create multiple sessions until it expires. # # @return [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter, nil] optional :expires_after, -> { OpenAI::Realtime::ClientSecretCreateParams::ExpiresAfter } @@ -25,7 +28,7 @@ class ClientSecretCreateParams < OpenAI::Internal::Type::BaseModel # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::ClientSecretCreateParams} for more details. # - # @param expires_after [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter] Configuration for the ephemeral token expiration. + # @param expires_after [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter] Configuration for the client secret expiration. Expiration refers to the time af # # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] Session configuration to use for the client secret. Choose either a realtime # @@ -33,15 +36,17 @@ class ClientSecretCreateParams < OpenAI::Internal::Type::BaseModel class ExpiresAfter < OpenAI::Internal::Type::BaseModel # @!attribute anchor - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. + # The anchor point for the client secret expiration, meaning that `seconds` will + # be added to the `created_at` time of the client secret to produce an expiration + # timestamp. Only `created_at` is currently supported. # # @return [Symbol, OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor, nil] optional :anchor, enum: -> { OpenAI::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor } # @!attribute seconds # The number of seconds from the anchor point to the expiration. Select a value - # between `10` and `7200`. + # between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if + # not specified. # # @return [Integer, nil] optional :seconds, Integer @@ -51,14 +56,18 @@ class ExpiresAfter < OpenAI::Internal::Type::BaseModel # {OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter} for more # details. # - # Configuration for the ephemeral token expiration. + # Configuration for the client secret expiration. Expiration refers to the time + # after which a client secret will no longer be valid for creating sessions. The + # session itself may continue after that time once started. A secret can be used + # to create multiple sessions until it expires. # - # @param anchor [Symbol, OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor] The anchor point for the ephemeral token expiration. Only `created_at` is curren + # @param anchor [Symbol, OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor] The anchor point for the client secret expiration, meaning that `seconds` will b # # @param seconds [Integer] The number of seconds from the anchor point to the expiration. Select a value be - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. + # The anchor point for the client secret expiration, meaning that `seconds` will + # be added to the `created_at` time of the client secret to produce an expiration + # timestamp. Only `created_at` is currently supported. # # @see OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter#anchor module Anchor diff --git a/lib/openai/models/realtime/client_secret_create_response.rb b/lib/openai/models/realtime/client_secret_create_response.rb index cce2a91b..47e4a0ba 100644 --- a/lib/openai/models/realtime/client_secret_create_response.rb +++ b/lib/openai/models/realtime/client_secret_create_response.rb @@ -14,7 +14,7 @@ class ClientSecretCreateResponse < OpenAI::Internal::Type::BaseModel # @!attribute session # The session configuration for either a realtime or transcription session. # - # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse] + # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] required :session, union: -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session } # @!attribute value @@ -31,7 +31,7 @@ class ClientSecretCreateResponse < OpenAI::Internal::Type::BaseModel # # @param expires_at [Integer] Expiration timestamp for the client secret, in seconds since epoch. # - # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse] The session configuration for either a realtime or transcription session. + # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] The session configuration for either a realtime or transcription session. # # @param value [String] The generated client secret value. @@ -41,258 +41,19 @@ class ClientSecretCreateResponse < OpenAI::Internal::Type::BaseModel module Session extend OpenAI::Internal::Type::Union - # A Realtime session configuration object. + # A new Realtime session configuration, with an ephemeral key. Default TTL + # for keys is one minute. variant -> { OpenAI::Realtime::RealtimeSessionCreateResponse } - # A Realtime transcription session configuration object. - variant -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse } - - class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel - # @!attribute id - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - # - # @return [String, nil] - optional :id, String - - # @!attribute audio - # Configuration for input audio for the session. - # - # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio, nil] - optional :audio, - -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio } - - # @!attribute expires_at - # Expiration timestamp for the session, in seconds since epoch. - # - # @return [Integer, nil] - optional :expires_at, Integer - - # @!attribute include - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - # - # @return [Array, nil] - optional :include, - -> do - OpenAI::Internal::Type::ArrayOf[ - enum: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include - ] - end - - # @!attribute object - # The object type. Always `realtime.transcription_session`. - # - # @return [String, nil] - optional :object, String - - # @!method initialize(id: nil, audio: nil, expires_at: nil, include: nil, object: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse} - # for more details. - # - # A Realtime transcription session configuration object. - # - # @param id [String] Unique identifier for the session that looks like `sess_1234567890abcdef`. - # - # @param audio [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio] Configuration for input audio for the session. - # - # @param expires_at [Integer] Expiration timestamp for the session, in seconds since epoch. - # - # @param include [Array] Additional fields to include in server outputs. - # - # @param object [String] The object type. Always `realtime.transcription_session`. - - # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse#audio - class Audio < OpenAI::Internal::Type::BaseModel - # @!attribute input - # - # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input, nil] - optional :input, - -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input } - - # @!method initialize(input: nil) - # Configuration for input audio for the session. - # - # @param input [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input] - - # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio#input - class Input < OpenAI::Internal::Type::BaseModel - # @!attribute format_ - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @return [String, nil] - optional :format_, String, api_name: :format - - # @!attribute noise_reduction - # Configuration for input audio noise reduction. - # - # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction, nil] - optional :noise_reduction, - -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction } - - # @!attribute transcription - # Configuration of the transcription model. - # - # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription, nil] - optional :transcription, - -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription } - - # @!attribute turn_detection - # Configuration for turn detection. - # - # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection, nil] - optional :turn_detection, - -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection } - - # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input} - # for more details. - # - # @param format_ [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @param noise_reduction [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction] Configuration for input audio noise reduction. - # - # @param transcription [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription] Configuration of the transcription model. - # - # @param turn_detection [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection] Configuration for turn detection. - - # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input#noise_reduction - class NoiseReduction < OpenAI::Internal::Type::BaseModel - # @!attribute type - # - # @return [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type, nil] - optional :type, - enum: -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type } - - # @!method initialize(type: nil) - # Configuration for input audio noise reduction. - # - # @param type [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type] - - # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction#type - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD = :near_field - FAR_FIELD = :far_field - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input#transcription - class Transcription < OpenAI::Internal::Type::BaseModel - # @!attribute language - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - # - # @return [String, nil] - optional :language, String - - # @!attribute model - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - # - # @return [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model, nil] - optional :model, - enum: -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model } - - # @!attribute prompt - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - # - # @return [String, nil] - optional :prompt, String - - # @!method initialize(language: nil, model: nil, prompt: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription} - # for more details. - # - # Configuration of the transcription model. - # - # @param language [String] The language of the input audio. Supplying the input language in - # - # @param model [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model] The model to use for transcription. Can be `gpt-4o-transcribe`, `gpt-4o-mini-tra - # - # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - # - # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription#model - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" - WHISPER_1 = :"whisper-1" - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input#turn_detection - class TurnDetection < OpenAI::Internal::Type::BaseModel - # @!attribute prefix_padding_ms - # - # @return [Integer, nil] - optional :prefix_padding_ms, Integer - - # @!attribute silence_duration_ms - # - # @return [Integer, nil] - optional :silence_duration_ms, Integer - - # @!attribute threshold - # - # @return [Float, nil] - optional :threshold, Float - - # @!attribute type - # Type of turn detection, only `server_vad` is currently supported. - # - # @return [String, nil] - optional :type, String - - # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection} - # for more details. - # - # Configuration for turn detection. - # - # @param prefix_padding_ms [Integer] - # - # @param silence_duration_ms [Integer] - # - # @param threshold [Float] - # - # @param type [String] Type of turn detection, only `server_vad` is currently supported. - end - end - end - - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs" - - # @!method self.values - # @return [Array] - end - end + # A new Realtime transcription session configuration. + # + # When a session is created on the server via REST API, the session object + # also contains an ephemeral key. Default TTL for keys is 10 minutes. This + # property is not present when a session is updated via the WebSocket API. + variant -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse } # @!method self.variants - # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse)] + # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse)] end end end diff --git a/lib/openai/models/realtime/conversation_item.rb b/lib/openai/models/realtime/conversation_item.rb index 8b583b35..5aba025d 100644 --- a/lib/openai/models/realtime/conversation_item.rb +++ b/lib/openai/models/realtime/conversation_item.rb @@ -9,7 +9,7 @@ module ConversationItem discriminator :type - # A system message item in a Realtime conversation. + # A system message in a Realtime conversation can be used to provide additional context or instructions to the model. This is similar but distinct from the instruction prompt provided at the start of a conversation, as system messages can be added at any point in the conversation. For major changes to the conversation's behavior, use instructions, but for smaller updates (e.g. "the user is now asking about a different topic"), use system messages. variant :message, -> { OpenAI::Realtime::RealtimeConversationItemSystemMessage } # A user message item in a Realtime conversation. diff --git a/lib/openai/models/realtime/conversation_item_added.rb b/lib/openai/models/realtime/conversation_item_added.rb index 94c60d2e..2e218b74 100644 --- a/lib/openai/models/realtime/conversation_item_added.rb +++ b/lib/openai/models/realtime/conversation_item_added.rb @@ -33,7 +33,20 @@ class ConversationItemAdded < OpenAI::Internal::Type::BaseModel # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::ConversationItemAdded} for more details. # - # Returned when a conversation item is added. + # Sent by the server when an Item is added to the default Conversation. This can + # happen in several cases: + # + # - When the client sends a `conversation.item.create` event. + # - When the input audio buffer is committed. In this case the item will be a user + # message containing the audio from the buffer. + # - When the model is generating a Response. In this case the + # `conversation.item.added` event will be sent when the model starts generating + # a specific Item, and thus it will not yet have any content (and `status` will + # be `in_progress`). + # + # The event will include the full content of the Item (except when model is + # generating a Response) except for audio data, which can be retrieved separately + # with a `conversation.item.retrieve` event if necessary. # # @param event_id [String] The unique ID of the server event. # diff --git a/lib/openai/models/realtime/conversation_item_done.rb b/lib/openai/models/realtime/conversation_item_done.rb index a296f737..471c73d6 100644 --- a/lib/openai/models/realtime/conversation_item_done.rb +++ b/lib/openai/models/realtime/conversation_item_done.rb @@ -35,6 +35,9 @@ class ConversationItemDone < OpenAI::Internal::Type::BaseModel # # Returned when a conversation item is finalized. # + # The event will include the full content of the Item except for audio data, which + # can be retrieved separately with a `conversation.item.retrieve` event if needed. + # # @param event_id [String] The unique ID of the server event. # # @param item [OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput, OpenAI::Models::Realtime::RealtimeMcpApprovalResponse, OpenAI::Models::Realtime::RealtimeMcpListTools, OpenAI::Models::Realtime::RealtimeMcpToolCall, OpenAI::Models::Realtime::RealtimeMcpApprovalRequest] A single item within a Realtime conversation. diff --git a/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb b/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb index 627c5a23..02bb7d39 100644 --- a/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +++ b/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb @@ -17,7 +17,7 @@ class ConversationItemInputAudioTranscriptionCompletedEvent < OpenAI::Internal:: required :event_id, String # @!attribute item_id - # The ID of the user message item containing the audio. + # The ID of the item containing the audio that is being transcribed. # # @return [String] required :item_id, String @@ -35,7 +35,8 @@ class ConversationItemInputAudioTranscriptionCompletedEvent < OpenAI::Internal:: required :type, const: :"conversation.item.input_audio_transcription.completed" # @!attribute usage - # Usage statistics for the transcription. + # Usage statistics for the transcription, this is billed according to the ASR + # model's pricing rather than the realtime model's pricing. # # @return [OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageTokens, OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageDuration] required :usage, @@ -56,9 +57,9 @@ class ConversationItemInputAudioTranscriptionCompletedEvent < OpenAI::Internal:: # # This event is the output of audio transcription for user audio written to the # user audio buffer. Transcription begins when the input audio buffer is committed - # by the client or server (in `server_vad` mode). Transcription runs - # asynchronously with Response creation, so this event may come before or after - # the Response events. + # by the client or server (when VAD is enabled). Transcription runs asynchronously + # with Response creation, so this event may come before or after the Response + # events. # # Realtime API models accept audio natively, and thus input transcription is a # separate process run on a separate ASR (Automatic Speech Recognition) model. The @@ -69,17 +70,18 @@ class ConversationItemInputAudioTranscriptionCompletedEvent < OpenAI::Internal:: # # @param event_id [String] The unique ID of the server event. # - # @param item_id [String] The ID of the user message item containing the audio. + # @param item_id [String] The ID of the item containing the audio that is being transcribed. # # @param transcript [String] The transcribed text. # - # @param usage [OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageTokens, OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageDuration] Usage statistics for the transcription. + # @param usage [OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageTokens, OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageDuration] Usage statistics for the transcription, this is billed according to the ASR mode # # @param logprobs [Array, nil] The log probabilities of the transcription. # # @param type [Symbol, :"conversation.item.input_audio_transcription.completed"] The event type, must be - # Usage statistics for the transcription. + # Usage statistics for the transcription, this is billed according to the ASR + # model's pricing rather than the realtime model's pricing. # # @see OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent#usage module Usage diff --git a/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb b/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb index 4c82cd63..c22d02d3 100644 --- a/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +++ b/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb @@ -11,7 +11,7 @@ class ConversationItemInputAudioTranscriptionDeltaEvent < OpenAI::Internal::Type required :event_id, String # @!attribute item_id - # The ID of the item. + # The ID of the item containing the audio that is being transcribed. # # @return [String] required :item_id, String @@ -35,7 +35,12 @@ class ConversationItemInputAudioTranscriptionDeltaEvent < OpenAI::Internal::Type optional :delta, String # @!attribute logprobs - # The log probabilities of the transcription. + # The log probabilities of the transcription. These can be enabled by + # configurating the session with + # `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the + # array corresponds a log probability of which token would be selected for this + # chunk of transcription. This can help to identify if it was possible there were + # multiple valid options for a given chunk of transcription. # # @return [Array, nil] optional :logprobs, @@ -43,18 +48,22 @@ class ConversationItemInputAudioTranscriptionDeltaEvent < OpenAI::Internal::Type nil?: true # @!method initialize(event_id:, item_id:, content_index: nil, delta: nil, logprobs: nil, type: :"conversation.item.input_audio_transcription.delta") + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionDeltaEvent} + # for more details. + # # Returned when the text value of an input audio transcription content part is - # updated. + # updated with incremental transcription results. # # @param event_id [String] The unique ID of the server event. # - # @param item_id [String] The ID of the item. + # @param item_id [String] The ID of the item containing the audio that is being transcribed. # # @param content_index [Integer] The index of the content part in the item's content array. # # @param delta [String] The text delta. # - # @param logprobs [Array, nil] The log probabilities of the transcription. + # @param logprobs [Array, nil] The log probabilities of the transcription. These can be enabled by configuratin # # @param type [Symbol, :"conversation.item.input_audio_transcription.delta"] The event type, must be `conversation.item.input_audio_transcription.delta`. end diff --git a/lib/openai/models/realtime/conversation_item_truncate_event.rb b/lib/openai/models/realtime/conversation_item_truncate_event.rb index 640e1b45..78cb368e 100644 --- a/lib/openai/models/realtime/conversation_item_truncate_event.rb +++ b/lib/openai/models/realtime/conversation_item_truncate_event.rb @@ -13,7 +13,7 @@ class ConversationItemTruncateEvent < OpenAI::Internal::Type::BaseModel required :audio_end_ms, Integer # @!attribute content_index - # The index of the content part to truncate. Set this to 0. + # The index of the content part to truncate. Set this to `0`. # # @return [Integer] required :content_index, Integer @@ -55,7 +55,7 @@ class ConversationItemTruncateEvent < OpenAI::Internal::Type::BaseModel # # @param audio_end_ms [Integer] Inclusive duration up to which audio is truncated, in milliseconds. If # - # @param content_index [Integer] The index of the content part to truncate. Set this to 0. + # @param content_index [Integer] The index of the content part to truncate. Set this to `0`. # # @param item_id [String] The ID of the assistant message item to truncate. Only assistant message # diff --git a/lib/openai/models/realtime/input_audio_buffer_append_event.rb b/lib/openai/models/realtime/input_audio_buffer_append_event.rb index 9ab6446f..968d313e 100644 --- a/lib/openai/models/realtime/input_audio_buffer_append_event.rb +++ b/lib/openai/models/realtime/input_audio_buffer_append_event.rb @@ -28,14 +28,19 @@ class InputAudioBufferAppendEvent < OpenAI::Internal::Type::BaseModel # {OpenAI::Models::Realtime::InputAudioBufferAppendEvent} for more details. # # Send this event to append audio bytes to the input audio buffer. The audio - # buffer is temporary storage you can write to and later commit. In Server VAD - # mode, the audio buffer is used to detect speech and the server will decide when - # to commit. When Server VAD is disabled, you must commit the audio buffer - # manually. + # buffer is temporary storage you can write to and later commit. A "commit" will + # create a new user message item in the conversation history from the buffer + # content and clear the buffer. Input audio transcription (if enabled) will be + # generated when the buffer is committed. + # + # If VAD is enabled the audio buffer is used to detect speech and the server will + # decide when to commit. When Server VAD is disabled, you must commit the audio + # buffer manually. Input audio noise reduction operates on writes to the audio + # buffer. # # The client may choose how much audio to place in each event up to a maximum of # 15 MiB, for example streaming smaller chunks from the client may allow the VAD - # to be more responsive. Unlike made other client events, the server will not send + # to be more responsive. Unlike most other client events, the server will not send # a confirmation response to this event. # # @param audio [String] Base64-encoded audio bytes. This must be in the format specified by the diff --git a/lib/openai/models/realtime/models.rb b/lib/openai/models/realtime/models.rb new file mode 100644 index 00000000..332349c1 --- /dev/null +++ b/lib/openai/models/realtime/models.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class Models < OpenAI::Internal::Type::BaseModel + # @!attribute description + # The description of the function, including guidance on when and how to call it, + # and guidance about what to tell the user when calling (if anything). + # + # @return [String, nil] + optional :description, String + + # @!attribute name + # The name of the function. + # + # @return [String, nil] + optional :name, String + + # @!attribute parameters + # Parameters of the function in JSON Schema. + # + # @return [Object, nil] + optional :parameters, OpenAI::Internal::Type::Unknown + + # @!attribute type + # The type of the tool, i.e. `function`. + # + # @return [Symbol, OpenAI::Models::Realtime::Models::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::Models::Type } + + # @!method initialize(description: nil, name: nil, parameters: nil, type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::Models} for more details. + # + # @param description [String] The description of the function, including guidance on when and how + # + # @param name [String] The name of the function. + # + # @param parameters [Object] Parameters of the function in JSON Schema. + # + # @param type [Symbol, OpenAI::Models::Realtime::Models::Type] The type of the tool, i.e. `function`. + + # The type of the tool, i.e. `function`. + # + # @see OpenAI::Models::Realtime::Models#type + module Type + extend OpenAI::Internal::Type::Enum + + FUNCTION = :function + + # @!method self.values + # @return [Array] + end + end + end + end +end diff --git a/lib/openai/models/realtime/noise_reduction_type.rb b/lib/openai/models/realtime/noise_reduction_type.rb new file mode 100644 index 00000000..423a379e --- /dev/null +++ b/lib/openai/models/realtime/noise_reduction_type.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + module NoiseReductionType + extend OpenAI::Internal::Type::Enum + + NEAR_FIELD = :near_field + FAR_FIELD = :far_field + + # @!method self.values + # @return [Array] + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_audio_config.rb b/lib/openai/models/realtime/realtime_audio_config.rb index bd72db15..4233250a 100644 --- a/lib/openai/models/realtime/realtime_audio_config.rb +++ b/lib/openai/models/realtime/realtime_audio_config.rb @@ -6,440 +6,19 @@ module Realtime class RealtimeAudioConfig < OpenAI::Internal::Type::BaseModel # @!attribute input # - # @return [OpenAI::Models::Realtime::RealtimeAudioConfig::Input, nil] - optional :input, -> { OpenAI::Realtime::RealtimeAudioConfig::Input } + # @return [OpenAI::Models::Realtime::RealtimeAudioConfigInput, nil] + optional :input, -> { OpenAI::Realtime::RealtimeAudioConfigInput } # @!attribute output # - # @return [OpenAI::Models::Realtime::RealtimeAudioConfig::Output, nil] - optional :output, -> { OpenAI::Realtime::RealtimeAudioConfig::Output } + # @return [OpenAI::Models::Realtime::RealtimeAudioConfigOutput, nil] + optional :output, -> { OpenAI::Realtime::RealtimeAudioConfigOutput } # @!method initialize(input: nil, output: nil) # Configuration for input and output audio. # - # @param input [OpenAI::Models::Realtime::RealtimeAudioConfig::Input] - # @param output [OpenAI::Models::Realtime::RealtimeAudioConfig::Output] - - # @see OpenAI::Models::Realtime::RealtimeAudioConfig#input - class Input < OpenAI::Internal::Type::BaseModel - # @!attribute format_ - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Format, nil] - optional :format_, - enum: -> { - OpenAI::Realtime::RealtimeAudioConfig::Input::Format - }, - api_name: :format - - # @!attribute noise_reduction - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - # - # @return [OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction, nil] - optional :noise_reduction, -> { OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction } - - # @!attribute transcription - # Configuration for input audio transcription, defaults to off and can be set to - # `null` to turn off once on. Input audio transcription is not native to the - # model, since the model consumes audio directly. Transcription runs - # asynchronously through - # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - # and should be treated as guidance of input audio content rather than precisely - # what the model heard. The client can optionally set the language and prompt for - # transcription, these offer additional guidance to the transcription service. - # - # @return [OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription, nil] - optional :transcription, -> { OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription } - - # @!attribute turn_detection - # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be - # set to `null` to turn off, in which case the client must manually trigger model - # response. Server VAD means that the model will detect the start and end of - # speech based on audio volume and respond at the end of user speech. Semantic VAD - # is more advanced and uses a turn detection model (in conjunction with VAD) to - # semantically estimate whether the user has finished speaking, then dynamically - # sets a timeout based on this probability. For example, if user audio trails off - # with "uhhm", the model will score a low probability of turn end and wait longer - # for the user to continue speaking. This can be useful for more natural - # conversations, but may have a higher latency. - # - # @return [OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection, nil] - optional :turn_detection, -> { OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection } - - # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeAudioConfig::Input} for more details. - # - # @param format_ [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Format] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @param noise_reduction [OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn - # - # @param transcription [OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription] Configuration for input audio transcription, defaults to off and can be set to ` - # - # @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - # - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input#format_ - module Format - extend OpenAI::Internal::Type::Enum - - PCM16 = :pcm16 - G711_ULAW = :g711_ulaw - G711_ALAW = :g711_alaw - - # @!method self.values - # @return [Array] - end - - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input#noise_reduction - class NoiseReduction < OpenAI::Internal::Type::BaseModel - # @!attribute type - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type, nil] - optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type } - - # @!method initialize(type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction} for more - # details. - # - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type] Type of noise reduction. `near_field` is for close-talking microphones such as h - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - # - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction#type - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD = :near_field - FAR_FIELD = :far_field - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input#transcription - class Transcription < OpenAI::Internal::Type::BaseModel - # @!attribute language - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - # - # @return [String, nil] - optional :language, String - - # @!attribute model - # The model to use for transcription. Current options are `whisper-1`, - # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and - # `gpt-4o-transcribe-diarize`. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::Model, nil] - optional :model, enum: -> { OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model } - - # @!attribute prompt - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - # - # @return [String, nil] - optional :prompt, String - - # @!method initialize(language: nil, model: nil, prompt: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription} for more - # details. - # - # Configuration for input audio transcription, defaults to off and can be set to - # `null` to turn off once on. Input audio transcription is not native to the - # model, since the model consumes audio directly. Transcription runs - # asynchronously through - # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - # and should be treated as guidance of input audio content rather than precisely - # what the model heard. The client can optionally set the language and prompt for - # transcription, these offer additional guidance to the transcription service. - # - # @param language [String] The language of the input audio. Supplying the input language in - # - # @param model [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::Model] The model to use for transcription. Current options are - # - # @param prompt [String] An optional text to guide the model's style or continue a previous audio - - # The model to use for transcription. Current options are `whisper-1`, - # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and - # `gpt-4o-transcribe-diarize`. - # - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription#model - module Model - extend OpenAI::Internal::Type::Enum - - WHISPER_1 = :"whisper-1" - GPT_4O_TRANSCRIBE_LATEST = :"gpt-4o-transcribe-latest" - GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" - GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" - GPT_4O_TRANSCRIBE_DIARIZE = :"gpt-4o-transcribe-diarize" - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input#turn_detection - class TurnDetection < OpenAI::Internal::Type::BaseModel - # @!attribute create_response - # Whether or not to automatically generate a response when a VAD stop event - # occurs. - # - # @return [Boolean, nil] - optional :create_response, OpenAI::Internal::Type::Boolean - - # @!attribute eagerness - # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - # will wait longer for the user to continue speaking, `high` will respond more - # quickly. `auto` is the default and is equivalent to `medium`. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness, nil] - optional :eagerness, enum: -> { OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness } - - # @!attribute idle_timeout_ms - # Optional idle timeout after which turn detection will auto-timeout when no - # additional audio is received. - # - # @return [Integer, nil] - optional :idle_timeout_ms, Integer, nil?: true - - # @!attribute interrupt_response - # Whether or not to automatically interrupt any ongoing response with output to - # the default conversation (i.e. `conversation` of `auto`) when a VAD start event - # occurs. - # - # @return [Boolean, nil] - optional :interrupt_response, OpenAI::Internal::Type::Boolean - - # @!attribute prefix_padding_ms - # Used only for `server_vad` mode. Amount of audio to include before the VAD - # detected speech (in milliseconds). Defaults to 300ms. - # - # @return [Integer, nil] - optional :prefix_padding_ms, Integer - - # @!attribute silence_duration_ms - # Used only for `server_vad` mode. Duration of silence to detect speech stop (in - # milliseconds). Defaults to 500ms. With shorter values the model will respond - # more quickly, but may jump in on short pauses from the user. - # - # @return [Integer, nil] - optional :silence_duration_ms, Integer - - # @!attribute threshold - # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this - # defaults to 0.5. A higher threshold will require louder audio to activate the - # model, and thus might perform better in noisy environments. - # - # @return [Float, nil] - optional :threshold, Float - - # @!attribute type - # Type of turn detection. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type, nil] - optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type } - - # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection} for more - # details. - # - # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be - # set to `null` to turn off, in which case the client must manually trigger model - # response. Server VAD means that the model will detect the start and end of - # speech based on audio volume and respond at the end of user speech. Semantic VAD - # is more advanced and uses a turn detection model (in conjunction with VAD) to - # semantically estimate whether the user has finished speaking, then dynamically - # sets a timeout based on this probability. For example, if user audio trails off - # with "uhhm", the model will score a low probability of turn end and wait longer - # for the user to continue speaking. This can be useful for more natural - # conversations, but may have a higher latency. - # - # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs - # - # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - # - # @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when - # - # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th - # - # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec - # - # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m - # - # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this - # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type] Type of turn detection. - - # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - # will wait longer for the user to continue speaking, `high` will respond more - # quickly. `auto` is the default and is equivalent to `medium`. - # - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection#eagerness - module Eagerness - extend OpenAI::Internal::Type::Enum - - LOW = :low - MEDIUM = :medium - HIGH = :high - AUTO = :auto - - # @!method self.values - # @return [Array] - end - - # Type of turn detection. - # - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection#type - module Type - extend OpenAI::Internal::Type::Enum - - SERVER_VAD = :server_vad - SEMANTIC_VAD = :semantic_vad - - # @!method self.values - # @return [Array] - end - end - end - - # @see OpenAI::Models::Realtime::RealtimeAudioConfig#output - class Output < OpenAI::Internal::Type::BaseModel - # @!attribute format_ - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # For `pcm16`, output audio is sampled at a rate of 24kHz. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Format, nil] - optional :format_, - enum: -> { - OpenAI::Realtime::RealtimeAudioConfig::Output::Format - }, - api_name: :format - - # @!attribute speed - # The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the - # minimum speed. 1.5 is the maximum speed. This value can only be changed in - # between model turns, not while a response is in progress. - # - # @return [Float, nil] - optional :speed, Float - - # @!attribute voice - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, - # and `cedar`. - # - # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice, nil] - optional :voice, union: -> { OpenAI::Realtime::RealtimeAudioConfig::Output::Voice } - - # @!method initialize(format_: nil, speed: nil, voice: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeAudioConfig::Output} for more details. - # - # @param format_ [Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Format] The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @param speed [Float] The speed of the model's spoken response. 1.0 is the default speed. 0.25 is - # - # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice] The voice the model uses to respond. Voice cannot be changed during the - - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # For `pcm16`, output audio is sampled at a rate of 24kHz. - # - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Output#format_ - module Format - extend OpenAI::Internal::Type::Enum - - PCM16 = :pcm16 - G711_ULAW = :g711_ulaw - G711_ALAW = :g711_alaw - - # @!method self.values - # @return [Array] - end - - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, - # and `cedar`. - # - # @see OpenAI::Models::Realtime::RealtimeAudioConfig::Output#voice - module Voice - extend OpenAI::Internal::Type::Union - - variant String - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::ALLOY } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::ASH } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::BALLAD } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::CORAL } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::ECHO } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::SAGE } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::SHIMMER } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::VERSE } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::MARIN } - - variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfig::Output::Voice::CEDAR } - - # @!method self.variants - # @return [Array(String, Symbol)] - - define_sorbet_constant!(:Variants) do - T.type_alias { T.any(String, OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol) } - end - - # @!group - - ALLOY = :alloy - ASH = :ash - BALLAD = :ballad - CORAL = :coral - ECHO = :echo - SAGE = :sage - SHIMMER = :shimmer - VERSE = :verse - MARIN = :marin - CEDAR = :cedar - - # @!endgroup - end - end + # @param input [OpenAI::Models::Realtime::RealtimeAudioConfigInput] + # @param output [OpenAI::Models::Realtime::RealtimeAudioConfigOutput] end end end diff --git a/lib/openai/models/realtime/realtime_audio_config_input.rb b/lib/openai/models/realtime/realtime_audio_config_input.rb new file mode 100644 index 00000000..89f70507 --- /dev/null +++ b/lib/openai/models/realtime/realtime_audio_config_input.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel + # @!attribute format_ + # The format of the input audio. + # + # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil] + optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format + + # @!attribute noise_reduction + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + # + # @return [OpenAI::Models::Realtime::RealtimeAudioConfigInput::NoiseReduction, nil] + optional :noise_reduction, -> { OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction } + + # @!attribute transcription + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. + # + # @return [OpenAI::Models::Realtime::AudioTranscription, nil] + optional :transcription, -> { OpenAI::Realtime::AudioTranscription } + + # @!attribute turn_detection + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + # + # @return [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection, nil] + optional :turn_detection, -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection } + + # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeAudioConfigInput} for more details. + # + # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the input audio. + # + # @param noise_reduction [OpenAI::Models::Realtime::RealtimeAudioConfigInput::NoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn + # + # @param transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription, defaults to off and can be set to ` + # + # @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + + # @see OpenAI::Models::Realtime::RealtimeAudioConfigInput#noise_reduction + class NoiseReduction < OpenAI::Internal::Type::BaseModel + # @!attribute type + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + # + # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil] + optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType } + + # @!method initialize(type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeAudioConfigInput::NoiseReduction} for more + # details. + # + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + # + # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_audio_config_output.rb b/lib/openai/models/realtime/realtime_audio_config_output.rb new file mode 100644 index 00000000..90cec154 --- /dev/null +++ b/lib/openai/models/realtime/realtime_audio_config_output.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel + # @!attribute format_ + # The format of the output audio. + # + # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil] + optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format + + # @!attribute speed + # The speed of the model's spoken response as a multiple of the original speed. + # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + # This value can only be changed in between model turns, not while a response is + # in progress. + # + # This parameter is a post-processing adjustment to the audio after it is + # generated, it's also possible to prompt the model to speak faster or slower. + # + # @return [Float, nil] + optional :speed, Float + + # @!attribute voice + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + # + # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice, nil] + optional :voice, union: -> { OpenAI::Realtime::RealtimeAudioConfigOutput::Voice } + + # @!method initialize(format_: nil, speed: nil, voice: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeAudioConfigOutput} for more details. + # + # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the output audio. + # + # @param speed [Float] The speed of the model's spoken response as a multiple of the original speed. + # + # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice] The voice the model uses to respond. Voice cannot be changed during the + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + # + # @see OpenAI::Models::Realtime::RealtimeAudioConfigOutput#voice + module Voice + extend OpenAI::Internal::Type::Union + + variant String + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ALLOY } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ASH } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::BALLAD } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::CORAL } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ECHO } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::SAGE } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::SHIMMER } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::VERSE } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::MARIN } + + variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::CEDAR } + + # @!method self.variants + # @return [Array(String, Symbol)] + + define_sorbet_constant!(:Variants) do + T.type_alias { T.any(String, OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol) } + end + + # @!group + + ALLOY = :alloy + ASH = :ash + BALLAD = :ballad + CORAL = :coral + ECHO = :echo + SAGE = :sage + SHIMMER = :shimmer + VERSE = :verse + MARIN = :marin + CEDAR = :cedar + + # @!endgroup + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_audio_formats.rb b/lib/openai/models/realtime/realtime_audio_formats.rb new file mode 100644 index 00000000..471b63e2 --- /dev/null +++ b/lib/openai/models/realtime/realtime_audio_formats.rb @@ -0,0 +1,121 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + # The PCM audio format. Only a 24kHz sample rate is supported. + module RealtimeAudioFormats + extend OpenAI::Internal::Type::Union + + discriminator :type + + # The PCM audio format. Only a 24kHz sample rate is supported. + variant :"audio/pcm", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM } + + # The G.711 μ-law format. + variant :"audio/pcmu", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU } + + # The G.711 A-law format. + variant :"audio/pcma", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA } + + class AudioPCM < OpenAI::Internal::Type::BaseModel + # @!attribute rate + # The sample rate of the audio. Always `24000`. + # + # @return [Integer, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Rate, nil] + optional :rate, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate } + + # @!attribute type + # The audio format. Always `audio/pcm`. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type } + + # @!method initialize(rate: nil, type: nil) + # The PCM audio format. Only a 24kHz sample rate is supported. + # + # @param rate [Integer, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Rate] The sample rate of the audio. Always `24000`. + # + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Type] The audio format. Always `audio/pcm`. + + # The sample rate of the audio. Always `24000`. + # + # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM#rate + module Rate + extend OpenAI::Internal::Type::Enum + + RATE_24000 = 24_000 + + # @!method self.values + # @return [Array] + end + + # The audio format. Always `audio/pcm`. + # + # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM#type + module Type + extend OpenAI::Internal::Type::Enum + + AUDIO_PCM = :"audio/pcm" + + # @!method self.values + # @return [Array] + end + end + + class AudioPCMU < OpenAI::Internal::Type::BaseModel + # @!attribute type + # The audio format. Always `audio/pcmu`. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type } + + # @!method initialize(type: nil) + # The G.711 μ-law format. + # + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::Type] The audio format. Always `audio/pcmu`. + + # The audio format. Always `audio/pcmu`. + # + # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU#type + module Type + extend OpenAI::Internal::Type::Enum + + AUDIO_PCMU = :"audio/pcmu" + + # @!method self.values + # @return [Array] + end + end + + class AudioPCMA < OpenAI::Internal::Type::BaseModel + # @!attribute type + # The audio format. Always `audio/pcma`. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type } + + # @!method initialize(type: nil) + # The G.711 A-law format. + # + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::Type] The audio format. Always `audio/pcma`. + + # The audio format. Always `audio/pcma`. + # + # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA#type + module Type + extend OpenAI::Internal::Type::Enum + + AUDIO_PCMA = :"audio/pcma" + + # @!method self.values + # @return [Array] + end + end + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA)] + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb b/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb new file mode 100644 index 00000000..c2e87f19 --- /dev/null +++ b/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel + # @!attribute create_response + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + # + # @return [Boolean, nil] + optional :create_response, OpenAI::Internal::Type::Boolean + + # @!attribute eagerness + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness, nil] + optional :eagerness, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness } + + # @!attribute idle_timeout_ms + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + # + # @return [Integer, nil] + optional :idle_timeout_ms, Integer, nil?: true + + # @!attribute interrupt_response + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + # + # @return [Boolean, nil] + optional :interrupt_response, OpenAI::Internal::Type::Boolean + + # @!attribute prefix_padding_ms + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. + # + # @return [Integer, nil] + optional :prefix_padding_ms, Integer + + # @!attribute silence_duration_ms + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. + # + # @return [Integer, nil] + optional :silence_duration_ms, Integer + + # @!attribute threshold + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. + # + # @return [Float, nil] + optional :threshold, Float + + # @!attribute type + # Type of turn detection. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type } + + # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection} for more details. + # + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + # + # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs + # + # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # + # @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when + # + # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th + # + # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec + # + # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m + # + # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type] Type of turn detection. + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + # + # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#eagerness + module Eagerness + extend OpenAI::Internal::Type::Enum + + LOW = :low + MEDIUM = :medium + HIGH = :high + AUTO = :auto + + # @!method self.values + # @return [Array] + end + + # Type of turn detection. + # + # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#type + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD = :server_vad + SEMANTIC_VAD = :semantic_vad + + # @!method self.values + # @return [Array] + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_client_event.rb b/lib/openai/models/realtime/realtime_client_event.rb index ed3eebd9..bd5a4ca0 100644 --- a/lib/openai/models/realtime/realtime_client_event.rb +++ b/lib/openai/models/realtime/realtime_client_event.rb @@ -44,14 +44,17 @@ module RealtimeClientEvent variant :"conversation.item.truncate", -> { OpenAI::Realtime::ConversationItemTruncateEvent } # Send this event to append audio bytes to the input audio buffer. The audio - # buffer is temporary storage you can write to and later commit. In Server VAD - # mode, the audio buffer is used to detect speech and the server will decide + # buffer is temporary storage you can write to and later commit. A "commit" will create a new + # user message item in the conversation history from the buffer content and clear the buffer. + # Input audio transcription (if enabled) will be generated when the buffer is committed. + # + # If VAD is enabled the audio buffer is used to detect speech and the server will decide # when to commit. When Server VAD is disabled, you must commit the audio buffer - # manually. + # manually. Input audio noise reduction operates on writes to the audio buffer. # # The client may choose how much audio to place in each event up to a maximum # of 15 MiB, for example streaming smaller chunks from the client may allow the - # VAD to be more responsive. Unlike made other client events, the server will + # VAD to be more responsive. Unlike most other client events, the server will # not send a confirmation response to this event. variant :"input_audio_buffer.append", -> { OpenAI::Realtime::InputAudioBufferAppendEvent } @@ -66,21 +69,16 @@ module RealtimeClientEvent # [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). variant :"output_audio_buffer.clear", -> { OpenAI::Realtime::OutputAudioBufferClearEvent } - # Send this event to commit the user input audio buffer, which will create a - # new user message item in the conversation. This event will produce an error - # if the input audio buffer is empty. When in Server VAD mode, the client does - # not need to send this event, the server will commit the audio buffer - # automatically. + # Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. This event will produce an error if the input audio buffer is empty. When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically. # - # Committing the input audio buffer will trigger input audio transcription - # (if enabled in session configuration), but it will not create a response - # from the model. The server will respond with an `input_audio_buffer.committed` - # event. + # Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. The server will respond with an `input_audio_buffer.committed` event. variant :"input_audio_buffer.commit", -> { OpenAI::Realtime::InputAudioBufferCommitEvent } # Send this event to cancel an in-progress response. The server will respond # with a `response.done` event with a status of `response.status=cancelled`. If - # there is no response to cancel, the server will respond with an error. + # there is no response to cancel, the server will respond with an error. It's safe + # to call `response.cancel` even if no response is in progress, an error will be + # returned the session will remain unaffected. variant :"response.cancel", -> { OpenAI::Realtime::ResponseCancelEvent } # This event instructs the server to create a Response, which means triggering @@ -89,27 +87,37 @@ module RealtimeClientEvent # # A Response will include at least one Item, and may have two, in which case # the second will be a function call. These Items will be appended to the - # conversation history. + # conversation history by default. # # The server will respond with a `response.created` event, events for Items # and content created, and finally a `response.done` event to indicate the # Response is complete. # # The `response.create` event includes inference configuration like - # `instructions`, and `temperature`. These fields will override the Session's + # `instructions` and `tools`. If these are set, they will override the Session's # configuration for this Response only. + # + # Responses can be created out-of-band of the default Conversation, meaning that they can + # have arbitrary input, and it's possible to disable writing the output to the Conversation. + # Only one Response can write to the default Conversation at a time, but otherwise multiple + # Responses can be created in parallel. The `metadata` field is a good way to disambiguate + # multiple simultaneous Responses. + # + # Clients can set `conversation` to `none` to create a Response that does not write to the default + # Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting + # raw Items and references to existing Items. variant :"response.create", -> { OpenAI::Realtime::ResponseCreateEvent } - # Send this event to update the session’s default configuration. - # The client may send this event at any time to update any field, - # except for `voice`. However, note that once a session has been - # initialized with a particular `model`, it can’t be changed to - # another model using `session.update`. + # Send this event to update the session’s configuration. + # The client may send this event at any time to update any field + # except for `voice` and `model`. `voice` can be updated only if there have been no other + # audio outputs yet. # # When the server receives a `session.update`, it will respond # with a `session.updated` event showing the full, effective configuration. - # Only the fields that are present are updated. To clear a field like - # `instructions`, pass an empty string. + # Only the fields that are present in the `session.update` are updated. To clear a field like + # `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array. + # To clear a field like `turn_detection`, pass `null`. variant :"session.update", -> { OpenAI::Realtime::SessionUpdateEvent } # Send this event to update a transcription session. diff --git a/lib/openai/models/realtime/realtime_client_secret_config.rb b/lib/openai/models/realtime/realtime_client_secret_config.rb deleted file mode 100644 index ba080e71..00000000 --- a/lib/openai/models/realtime/realtime_client_secret_config.rb +++ /dev/null @@ -1,64 +0,0 @@ -# frozen_string_literal: true - -module OpenAI - module Models - module Realtime - class RealtimeClientSecretConfig < OpenAI::Internal::Type::BaseModel - # @!attribute expires_after - # Configuration for the ephemeral token expiration. - # - # @return [OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter, nil] - optional :expires_after, -> { OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter } - - # @!method initialize(expires_after: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeClientSecretConfig} for more details. - # - # Configuration options for the generated client secret. - # - # @param expires_after [OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter] Configuration for the ephemeral token expiration. - - # @see OpenAI::Models::Realtime::RealtimeClientSecretConfig#expires_after - class ExpiresAfter < OpenAI::Internal::Type::BaseModel - # @!attribute anchor - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor] - required :anchor, enum: -> { OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor } - - # @!attribute seconds - # The number of seconds from the anchor point to the expiration. Select a value - # between `10` and `7200`. - # - # @return [Integer, nil] - optional :seconds, Integer - - # @!method initialize(anchor:, seconds: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter} for more - # details. - # - # Configuration for the ephemeral token expiration. - # - # @param anchor [Symbol, OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor] The anchor point for the ephemeral token expiration. Only `created_at` is curren - # - # @param seconds [Integer] The number of seconds from the anchor point to the expiration. Select a value be - - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. - # - # @see OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter#anchor - module Anchor - extend OpenAI::Internal::Type::Enum - - CREATED_AT = :created_at - - # @!method self.values - # @return [Array] - end - end - end - end - end -end diff --git a/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb b/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb index dbc6883f..de169787 100644 --- a/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +++ b/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb @@ -24,13 +24,15 @@ class RealtimeConversationItemAssistantMessage < OpenAI::Internal::Type::BaseMod required :type, const: :message # @!attribute id - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. # # @return [String, nil] optional :id, String # @!attribute object - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object, nil] optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Object } @@ -42,13 +44,17 @@ class RealtimeConversationItemAssistantMessage < OpenAI::Internal::Type::BaseMod optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Status } # @!method initialize(content:, id: nil, object: nil, status: nil, role: :assistant, type: :message) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage} for more + # details. + # # An assistant message item in a Realtime conversation. # # @param content [Array] The content of the message. # - # @param id [String] The unique ID of the item. + # @param id [String] The unique ID of the item. This may be provided by the client or generated by th # - # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object] Identifier for the API object being returned - always `realtime.item`. + # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object] Identifier for the API object being returned - always `realtime.item`. Optional # # @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Status] The status of the item. Has no effect on the conversation. # @@ -57,37 +63,64 @@ class RealtimeConversationItemAssistantMessage < OpenAI::Internal::Type::BaseMod # @param type [Symbol, :message] The type of the item. Always `message`. class Content < OpenAI::Internal::Type::BaseModel + # @!attribute audio + # Base64-encoded audio bytes, these will be parsed as the format specified in the + # session output audio type configuration. This defaults to PCM 16-bit 24kHz mono + # if not specified. + # + # @return [String, nil] + optional :audio, String + # @!attribute text # The text content. # # @return [String, nil] optional :text, String + # @!attribute transcript + # The transcript of the audio content, this will always be present if the output + # type is `audio`. + # + # @return [String, nil] + optional :transcript, String + # @!attribute type - # The content type. Always `text` for assistant messages. + # The content type, `output_text` or `output_audio` depending on the session + # `output_modalities` configuration. # # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type, nil] optional :type, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type } - # @!method initialize(text: nil, type: nil) + # @!method initialize(audio: nil, text: nil, transcript: nil, type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content} + # for more details. + # + # @param audio [String] Base64-encoded audio bytes, these will be parsed as the format specified in the + # # @param text [String] The text content. # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type] The content type. Always `text` for assistant messages. + # @param transcript [String] The transcript of the audio content, this will always be present if the output t + # + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type] The content type, `output_text` or `output_audio` depending on the session `outp - # The content type. Always `text` for assistant messages. + # The content type, `output_text` or `output_audio` depending on the session + # `output_modalities` configuration. # # @see OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content#type module Type extend OpenAI::Internal::Type::Enum - TEXT = :text + OUTPUT_TEXT = :output_text + OUTPUT_AUDIO = :output_audio # @!method self.values # @return [Array] end end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @see OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage#object module Object diff --git a/lib/openai/models/realtime/realtime_conversation_item_function_call.rb b/lib/openai/models/realtime/realtime_conversation_item_function_call.rb index a05be1db..a014e36d 100644 --- a/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +++ b/lib/openai/models/realtime/realtime_conversation_item_function_call.rb @@ -5,7 +5,9 @@ module Models module Realtime class RealtimeConversationItemFunctionCall < OpenAI::Internal::Type::BaseModel # @!attribute arguments - # The arguments of the function call. + # The arguments of the function call. This is a JSON-encoded string representing + # the arguments passed to the function, for example + # `{"arg1": "value1", "arg2": 42}`. # # @return [String] required :arguments, String @@ -23,7 +25,8 @@ class RealtimeConversationItemFunctionCall < OpenAI::Internal::Type::BaseModel required :type, const: :function_call # @!attribute id - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. # # @return [String, nil] optional :id, String @@ -35,7 +38,8 @@ class RealtimeConversationItemFunctionCall < OpenAI::Internal::Type::BaseModel optional :call_id, String # @!attribute object - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object, nil] optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCall::Object } @@ -47,23 +51,28 @@ class RealtimeConversationItemFunctionCall < OpenAI::Internal::Type::BaseModel optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCall::Status } # @!method initialize(arguments:, name:, id: nil, call_id: nil, object: nil, status: nil, type: :function_call) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall} for more + # details. + # # A function call item in a Realtime conversation. # - # @param arguments [String] The arguments of the function call. + # @param arguments [String] The arguments of the function call. This is a JSON-encoded string representing t # # @param name [String] The name of the function being called. # - # @param id [String] The unique ID of the item. + # @param id [String] The unique ID of the item. This may be provided by the client or generated by th # # @param call_id [String] The ID of the function call. # - # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object] Identifier for the API object being returned - always `realtime.item`. + # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object] Identifier for the API object being returned - always `realtime.item`. Optional # # @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Status] The status of the item. Has no effect on the conversation. # # @param type [Symbol, :function_call] The type of the item. Always `function_call`. - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @see OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall#object module Object diff --git a/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb b/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb index 8c9d4055..927d0431 100644 --- a/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +++ b/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb @@ -11,7 +11,8 @@ class RealtimeConversationItemFunctionCallOutput < OpenAI::Internal::Type::BaseM required :call_id, String # @!attribute output - # The output of the function call. + # The output of the function call, this is free text and can contain any + # information or simply be empty. # # @return [String] required :output, String @@ -23,13 +24,15 @@ class RealtimeConversationItemFunctionCallOutput < OpenAI::Internal::Type::BaseM required :type, const: :function_call_output # @!attribute id - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. # # @return [String, nil] optional :id, String # @!attribute object - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput::Object, nil] optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput::Object } @@ -41,21 +44,26 @@ class RealtimeConversationItemFunctionCallOutput < OpenAI::Internal::Type::BaseM optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput::Status } # @!method initialize(call_id:, output:, id: nil, object: nil, status: nil, type: :function_call_output) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput} for more + # details. + # # A function call output item in a Realtime conversation. # # @param call_id [String] The ID of the function call this output is for. # - # @param output [String] The output of the function call. + # @param output [String] The output of the function call, this is free text and can contain any informati # - # @param id [String] The unique ID of the item. + # @param id [String] The unique ID of the item. This may be provided by the client or generated by th # - # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput::Object] Identifier for the API object being returned - always `realtime.item`. + # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput::Object] Identifier for the API object being returned - always `realtime.item`. Optional # # @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput::Status] The status of the item. Has no effect on the conversation. # # @param type [Symbol, :function_call_output] The type of the item. Always `function_call_output`. - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @see OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput#object module Object diff --git a/lib/openai/models/realtime/realtime_conversation_item_system_message.rb b/lib/openai/models/realtime/realtime_conversation_item_system_message.rb index 00297e86..e9594728 100644 --- a/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +++ b/lib/openai/models/realtime/realtime_conversation_item_system_message.rb @@ -24,13 +24,15 @@ class RealtimeConversationItemSystemMessage < OpenAI::Internal::Type::BaseModel required :type, const: :message # @!attribute id - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. # # @return [String, nil] optional :id, String # @!attribute object - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage::Object, nil] optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemSystemMessage::Object } @@ -42,13 +44,22 @@ class RealtimeConversationItemSystemMessage < OpenAI::Internal::Type::BaseModel optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemSystemMessage::Status } # @!method initialize(content:, id: nil, object: nil, status: nil, role: :system, type: :message) - # A system message item in a Realtime conversation. + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage} for more + # details. + # + # A system message in a Realtime conversation can be used to provide additional + # context or instructions to the model. This is similar but distinct from the + # instruction prompt provided at the start of a conversation, as system messages + # can be added at any point in the conversation. For major changes to the + # conversation's behavior, use instructions, but for smaller updates (e.g. "the + # user is now asking about a different topic"), use system messages. # # @param content [Array] The content of the message. # - # @param id [String] The unique ID of the item. + # @param id [String] The unique ID of the item. This may be provided by the client or generated by th # - # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage::Object] Identifier for the API object being returned - always `realtime.item`. + # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage::Object] Identifier for the API object being returned - always `realtime.item`. Optional # # @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage::Status] The status of the item. Has no effect on the conversation. # @@ -87,7 +98,8 @@ module Type end end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @see OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage#object module Object diff --git a/lib/openai/models/realtime/realtime_conversation_item_user_message.rb b/lib/openai/models/realtime/realtime_conversation_item_user_message.rb index 1df5ce9b..6723a09c 100644 --- a/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +++ b/lib/openai/models/realtime/realtime_conversation_item_user_message.rb @@ -24,13 +24,15 @@ class RealtimeConversationItemUserMessage < OpenAI::Internal::Type::BaseModel required :type, const: :message # @!attribute id - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. # # @return [String, nil] optional :id, String # @!attribute object - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Object, nil] optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemUserMessage::Object } @@ -42,13 +44,17 @@ class RealtimeConversationItemUserMessage < OpenAI::Internal::Type::BaseModel optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemUserMessage::Status } # @!method initialize(content:, id: nil, object: nil, status: nil, role: :user, type: :message) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeConversationItemUserMessage} for more + # details. + # # A user message item in a Realtime conversation. # # @param content [Array] The content of the message. # - # @param id [String] The unique ID of the item. + # @param id [String] The unique ID of the item. This may be provided by the client or generated by th # - # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Object] Identifier for the API object being returned - always `realtime.item`. + # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Object] Identifier for the API object being returned - always `realtime.item`. Optional # # @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Status] The status of the item. Has no effect on the conversation. # @@ -58,11 +64,28 @@ class RealtimeConversationItemUserMessage < OpenAI::Internal::Type::BaseModel class Content < OpenAI::Internal::Type::BaseModel # @!attribute audio - # Base64-encoded audio bytes (for `input_audio`). + # Base64-encoded audio bytes (for `input_audio`), these will be parsed as the + # format specified in the session input audio type configuration. This defaults to + # PCM 16-bit 24kHz mono if not specified. # # @return [String, nil] optional :audio, String + # @!attribute detail + # The detail level of the image (for `input_image`). `auto` will default to + # `high`. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::Detail, nil] + optional :detail, enum: -> { OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail } + + # @!attribute image_url + # Base64-encoded image bytes (for `input_image`) as a data URI. For example + # `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`. Supported formats are PNG + # and JPEG. + # + # @return [String, nil] + optional :image_url, String + # @!attribute text # The text content (for `input_text`). # @@ -70,27 +93,51 @@ class Content < OpenAI::Internal::Type::BaseModel optional :text, String # @!attribute transcript - # Transcript of the audio (for `input_audio`). + # Transcript of the audio (for `input_audio`). This is not sent to the model, but + # will be attached to the message item for reference. # # @return [String, nil] optional :transcript, String # @!attribute type - # The content type (`input_text` or `input_audio`). + # The content type (`input_text`, `input_audio`, or `input_image`). # # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::Type, nil] optional :type, enum: -> { OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Type } - # @!method initialize(audio: nil, text: nil, transcript: nil, type: nil) - # @param audio [String] Base64-encoded audio bytes (for `input_audio`). + # @!method initialize(audio: nil, detail: nil, image_url: nil, text: nil, transcript: nil, type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content} for + # more details. + # + # @param audio [String] Base64-encoded audio bytes (for `input_audio`), these will be parsed as the form + # + # @param detail [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::Detail] The detail level of the image (for `input_image`). `auto` will default to `high` + # + # @param image_url [String] Base64-encoded image bytes (for `input_image`) as a data URI. For example `data: # # @param text [String] The text content (for `input_text`). # - # @param transcript [String] Transcript of the audio (for `input_audio`). + # @param transcript [String] Transcript of the audio (for `input_audio`). This is not sent to the model, but # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::Type] The content type (`input_text` or `input_audio`). + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::Type] The content type (`input_text`, `input_audio`, or `input_image`). + + # The detail level of the image (for `input_image`). `auto` will default to + # `high`. + # + # @see OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content#detail + module Detail + extend OpenAI::Internal::Type::Enum + + AUTO = :auto + LOW = :low + HIGH = :high + + # @!method self.values + # @return [Array] + end - # The content type (`input_text` or `input_audio`). + # The content type (`input_text`, `input_audio`, or `input_image`). # # @see OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content#type module Type @@ -98,13 +145,15 @@ module Type INPUT_TEXT = :input_text INPUT_AUDIO = :input_audio + INPUT_IMAGE = :input_image # @!method self.values # @return [Array] end end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. # # @see OpenAI::Models::Realtime::RealtimeConversationItemUserMessage#object module Object diff --git a/lib/openai/models/realtime/realtime_response.rb b/lib/openai/models/realtime/realtime_response.rb index f59b6fdd..86c474c5 100644 --- a/lib/openai/models/realtime/realtime_response.rb +++ b/lib/openai/models/realtime/realtime_response.rb @@ -5,19 +5,24 @@ module Models module Realtime class RealtimeResponse < OpenAI::Internal::Type::BaseModel # @!attribute id - # The unique ID of the response. + # The unique ID of the response, will look like `resp_1234`. # # @return [String, nil] optional :id, String + # @!attribute audio + # Configuration for audio output. + # + # @return [OpenAI::Models::Realtime::RealtimeResponse::Audio, nil] + optional :audio, -> { OpenAI::Realtime::RealtimeResponse::Audio } + # @!attribute conversation_id # Which conversation the response is added to, determined by the `conversation` # field in the `response.create` event. If `auto`, the response will be added to # the default conversation and the value of `conversation_id` will be an id like # `conv_1234`. If `none`, the response will not be added to any conversation and # the value of `conversation_id` will be `null`. If responses are being triggered - # by server VAD, the response will be added to the default conversation, thus the - # `conversation_id` will be an id like `conv_1234`. + # automatically by VAD the response will be added to the default conversation # # @return [String, nil] optional :conversation_id, String @@ -40,15 +45,6 @@ class RealtimeResponse < OpenAI::Internal::Type::BaseModel # @return [Hash{Symbol=>String}, nil] optional :metadata, OpenAI::Internal::Type::HashOf[String], nil?: true - # @!attribute modalities - # The set of modalities the model used to respond. If there are multiple - # modalities, the model will pick one, for example if `modalities` is - # `["text", "audio"]`, the model could be responding in either text or audio. - # - # @return [Array, nil] - optional :modalities, - -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeResponse::Modality] } - # @!attribute object # The object type, must be `realtime.response`. # @@ -61,11 +57,15 @@ class RealtimeResponse < OpenAI::Internal::Type::BaseModel # @return [Array, nil] optional :output, -> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Realtime::ConversationItem] } - # @!attribute output_audio_format - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # @!attribute output_modalities + # The set of modalities the model used to respond, currently the only possible + # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text + # transcript. Setting the output to mode `text` will disable audio output from the + # model. # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeResponse::OutputAudioFormat, nil] - optional :output_audio_format, enum: -> { OpenAI::Realtime::RealtimeResponse::OutputAudioFormat } + # @return [Array, nil] + optional :output_modalities, + -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeResponse::OutputModality] } # @!attribute status # The final status of the response (`completed`, `cancelled`, `failed`, or @@ -80,12 +80,6 @@ class RealtimeResponse < OpenAI::Internal::Type::BaseModel # @return [OpenAI::Models::Realtime::RealtimeResponseStatus, nil] optional :status_details, -> { OpenAI::Realtime::RealtimeResponseStatus } - # @!attribute temperature - # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - # - # @return [Float, nil] - optional :temperature, Float - # @!attribute usage # Usage statistics for the Response, this will correspond to billing. A Realtime # API session will maintain a conversation context and append new Items to the @@ -95,20 +89,15 @@ class RealtimeResponse < OpenAI::Internal::Type::BaseModel # @return [OpenAI::Models::Realtime::RealtimeResponseUsage, nil] optional :usage, -> { OpenAI::Realtime::RealtimeResponseUsage } - # @!attribute voice - # The voice the model used to respond. Current voice options are `alloy`, `ash`, - # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - # - # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeResponse::Voice, nil] - optional :voice, union: -> { OpenAI::Realtime::RealtimeResponse::Voice } - - # @!method initialize(id: nil, conversation_id: nil, max_output_tokens: nil, metadata: nil, modalities: nil, object: nil, output: nil, output_audio_format: nil, status: nil, status_details: nil, temperature: nil, usage: nil, voice: nil) + # @!method initialize(id: nil, audio: nil, conversation_id: nil, max_output_tokens: nil, metadata: nil, object: nil, output: nil, output_modalities: nil, status: nil, status_details: nil, usage: nil) # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::RealtimeResponse} for more details. # # The response resource. # - # @param id [String] The unique ID of the response. + # @param id [String] The unique ID of the response, will look like `resp_1234`. + # + # @param audio [OpenAI::Models::Realtime::RealtimeResponse::Audio] Configuration for audio output. # # @param conversation_id [String] Which conversation the response is added to, determined by the `conversation` # @@ -116,23 +105,110 @@ class RealtimeResponse < OpenAI::Internal::Type::BaseModel # # @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. This can be # - # @param modalities [Array] The set of modalities the model used to respond. If there are multiple modalitie - # # @param object [Symbol, OpenAI::Models::Realtime::RealtimeResponse::Object] The object type, must be `realtime.response`. # # @param output [Array] The list of output items generated by the response. # - # @param output_audio_format [Symbol, OpenAI::Models::Realtime::RealtimeResponse::OutputAudioFormat] The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # @param output_modalities [Array] The set of modalities the model used to respond, currently the only possible val # # @param status [Symbol, OpenAI::Models::Realtime::RealtimeResponse::Status] The final status of the response (`completed`, `cancelled`, `failed`, or # # @param status_details [OpenAI::Models::Realtime::RealtimeResponseStatus] Additional details about the status. # - # @param temperature [Float] Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - # # @param usage [OpenAI::Models::Realtime::RealtimeResponseUsage] Usage statistics for the Response, this will correspond to billing. A - # - # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeResponse::Voice] The voice the model used to respond. + + # @see OpenAI::Models::Realtime::RealtimeResponse#audio + class Audio < OpenAI::Internal::Type::BaseModel + # @!attribute output + # + # @return [OpenAI::Models::Realtime::RealtimeResponse::Audio::Output, nil] + optional :output, -> { OpenAI::Realtime::RealtimeResponse::Audio::Output } + + # @!method initialize(output: nil) + # Configuration for audio output. + # + # @param output [OpenAI::Models::Realtime::RealtimeResponse::Audio::Output] + + # @see OpenAI::Models::Realtime::RealtimeResponse::Audio#output + class Output < OpenAI::Internal::Type::BaseModel + # @!attribute format_ + # The format of the output audio. + # + # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil] + optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format + + # @!attribute voice + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + # + # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice, nil] + optional :voice, union: -> { OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice } + + # @!method initialize(format_: nil, voice: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponse::Audio::Output} for more details. + # + # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the output audio. + # + # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice] The voice the model uses to respond. Voice cannot be changed during the + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + # + # @see OpenAI::Models::Realtime::RealtimeResponse::Audio::Output#voice + module Voice + extend OpenAI::Internal::Type::Union + + variant String + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::ALLOY } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::ASH } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::BALLAD } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::CORAL } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::ECHO } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::SAGE } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::SHIMMER } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::VERSE } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::MARIN } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::Voice::CEDAR } + + # @!method self.variants + # @return [Array(String, Symbol)] + + define_sorbet_constant!(:Variants) do + T.type_alias { T.any(String, OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol) } + end + + # @!group + + ALLOY = :alloy + ASH = :ash + BALLAD = :ballad + CORAL = :coral + ECHO = :echo + SAGE = :sage + SHIMMER = :shimmer + VERSE = :verse + MARIN = :marin + CEDAR = :cedar + + # @!endgroup + end + end + end # Maximum number of output tokens for a single assistant response, inclusive of # tool calls, that was used in this response. @@ -149,16 +225,6 @@ module MaxOutputTokens # @return [Array(Integer, Symbol, :inf)] end - module Modality - extend OpenAI::Internal::Type::Enum - - TEXT = :text - AUDIO = :audio - - # @!method self.values - # @return [Array] - end - # The object type, must be `realtime.response`. # # @see OpenAI::Models::Realtime::RealtimeResponse#object @@ -171,15 +237,11 @@ module Object # @return [Array] end - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @see OpenAI::Models::Realtime::RealtimeResponse#output_audio_format - module OutputAudioFormat + module OutputModality extend OpenAI::Internal::Type::Enum - PCM16 = :pcm16 - G711_ULAW = :g711_ulaw - G711_ALAW = :g711_alaw + TEXT = :text + AUDIO = :audio # @!method self.values # @return [Array] @@ -201,58 +263,6 @@ module Status # @!method self.values # @return [Array] end - - # The voice the model used to respond. Current voice options are `alloy`, `ash`, - # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - # - # @see OpenAI::Models::Realtime::RealtimeResponse#voice - module Voice - extend OpenAI::Internal::Type::Union - - variant String - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::ALLOY } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::ASH } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::BALLAD } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::CORAL } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::ECHO } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::SAGE } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::SHIMMER } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::VERSE } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::MARIN } - - variant const: -> { OpenAI::Models::Realtime::RealtimeResponse::Voice::CEDAR } - - # @!method self.variants - # @return [Array(String, Symbol)] - - define_sorbet_constant!(:Variants) do - T.type_alias { T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol) } - end - - # @!group - - ALLOY = :alloy - ASH = :ash - BALLAD = :ballad - CORAL = :coral - ECHO = :echo - SAGE = :sage - SHIMMER = :shimmer - VERSE = :verse - MARIN = :marin - CEDAR = :cedar - - # @!endgroup - end end end end diff --git a/lib/openai/models/realtime/realtime_response_create_audio_output.rb b/lib/openai/models/realtime/realtime_response_create_audio_output.rb new file mode 100644 index 00000000..13d726bb --- /dev/null +++ b/lib/openai/models/realtime/realtime_response_create_audio_output.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeResponseCreateAudioOutput < OpenAI::Internal::Type::BaseModel + # @!attribute output + # + # @return [OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output, nil] + optional :output, -> { OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output } + + # @!method initialize(output: nil) + # Configuration for audio input and output. + # + # @param output [OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output] + + # @see OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput#output + class Output < OpenAI::Internal::Type::BaseModel + # @!attribute format_ + # The format of the output audio. + # + # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil] + optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format + + # @!attribute voice + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + # + # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice, nil] + optional :voice, union: -> { OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice } + + # @!method initialize(format_: nil, voice: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output} for more + # details. + # + # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the output audio. + # + # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice] The voice the model uses to respond. Voice cannot be changed during the + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + # + # @see OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output#voice + module Voice + extend OpenAI::Internal::Type::Union + + variant String + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ALLOY } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ASH } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::BALLAD } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::CORAL } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ECHO } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::SAGE } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::SHIMMER } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::VERSE } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::MARIN } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::CEDAR } + + # @!method self.variants + # @return [Array(String, Symbol)] + + define_sorbet_constant!(:Variants) do + T.type_alias { T.any(String, OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol) } + end + + # @!group + + ALLOY = :alloy + ASH = :ash + BALLAD = :ballad + CORAL = :coral + ECHO = :echo + SAGE = :sage + SHIMMER = :shimmer + VERSE = :verse + MARIN = :marin + CEDAR = :cedar + + # @!endgroup + end + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb b/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb new file mode 100644 index 00000000..5abcd72a --- /dev/null +++ b/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb @@ -0,0 +1,310 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeResponseCreateMcpTool < OpenAI::Internal::Type::BaseModel + # @!attribute server_label + # A label for this MCP server, used to identify it in tool calls. + # + # @return [String] + required :server_label, String + + # @!attribute type + # The type of the MCP tool. Always `mcp`. + # + # @return [Symbol, :mcp] + required :type, const: :mcp + + # @!attribute allowed_tools + # List of allowed tool names or a filter object. + # + # @return [Array, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter, nil] + optional :allowed_tools, + union: -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools }, + nil?: true + + # @!attribute authorization + # An OAuth access token that can be used with a remote MCP server, either with a + # custom MCP server URL or a service connector. Your application must handle the + # OAuth authorization flow and provide the token here. + # + # @return [String, nil] + optional :authorization, String + + # @!attribute connector_id + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::ConnectorID, nil] + optional :connector_id, enum: -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID } + + # @!attribute headers + # Optional HTTP headers to send to the MCP server. Use for authentication or other + # purposes. + # + # @return [Hash{Symbol=>String}, nil] + optional :headers, OpenAI::Internal::Type::HashOf[String], nil?: true + + # @!attribute require_approval + # Specify which of the MCP server's tools require approval. + # + # @return [OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting, nil] + optional :require_approval, + union: -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval }, + nil?: true + + # @!attribute server_description + # Optional description of the MCP server, used to provide more context. + # + # @return [String, nil] + optional :server_description, String + + # @!attribute server_url + # The URL for the MCP server. One of `server_url` or `connector_id` must be + # provided. + # + # @return [String, nil] + optional :server_url, String + + # @!method initialize(server_label:, allowed_tools: nil, authorization: nil, connector_id: nil, headers: nil, require_approval: nil, server_description: nil, server_url: nil, type: :mcp) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool} for more details. + # + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + # + # @param server_label [String] A label for this MCP server, used to identify it in tool calls. + # + # @param allowed_tools [Array, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter, nil] List of allowed tool names or a filter object. + # + # @param authorization [String] An OAuth access token that can be used with a remote MCP server, either + # + # @param connector_id [Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::ConnectorID] Identifier for service connectors, like those available in ChatGPT. One of + # + # @param headers [Hash{Symbol=>String}, nil] Optional HTTP headers to send to the MCP server. Use for authentication + # + # @param require_approval [OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting, nil] Specify which of the MCP server's tools require approval. + # + # @param server_description [String] Optional description of the MCP server, used to provide more context. + # + # @param server_url [String] The URL for the MCP server. One of `server_url` or `connector_id` must be + # + # @param type [Symbol, :mcp] The type of the MCP tool. Always `mcp`. + + # List of allowed tool names or a filter object. + # + # @see OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool#allowed_tools + module AllowedTools + extend OpenAI::Internal::Type::Union + + # A string array of allowed tool names + variant -> { OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::StringArray } + + # A filter object to specify which tools are allowed. + variant -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter } + + class McpToolFilter < OpenAI::Internal::Type::BaseModel + # @!attribute read_only + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + # + # @return [Boolean, nil] + optional :read_only, OpenAI::Internal::Type::Boolean + + # @!attribute tool_names + # List of allowed tool names. + # + # @return [Array, nil] + optional :tool_names, OpenAI::Internal::Type::ArrayOf[String] + + # @!method initialize(read_only: nil, tool_names: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter} + # for more details. + # + # A filter object to specify which tools are allowed. + # + # @param read_only [Boolean] Indicates whether or not a tool modifies data or is read-only. If an + # + # @param tool_names [Array] List of allowed tool names. + end + + # @!method self.variants + # @return [Array(Array, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter)] + + # @type [OpenAI::Internal::Type::Converter] + StringArray = OpenAI::Internal::Type::ArrayOf[String] + end + + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + # + # @see OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool#connector_id + module ConnectorID + extend OpenAI::Internal::Type::Enum + + CONNECTOR_DROPBOX = :connector_dropbox + CONNECTOR_GMAIL = :connector_gmail + CONNECTOR_GOOGLECALENDAR = :connector_googlecalendar + CONNECTOR_GOOGLEDRIVE = :connector_googledrive + CONNECTOR_MICROSOFTTEAMS = :connector_microsoftteams + CONNECTOR_OUTLOOKCALENDAR = :connector_outlookcalendar + CONNECTOR_OUTLOOKEMAIL = :connector_outlookemail + CONNECTOR_SHAREPOINT = :connector_sharepoint + + # @!method self.values + # @return [Array] + end + + # Specify which of the MCP server's tools require approval. + # + # @see OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool#require_approval + module RequireApproval + extend OpenAI::Internal::Type::Union + + # Specify which of the MCP server's tools require approval. Can be + # `always`, `never`, or a filter object associated with tools + # that require approval. + variant -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter } + + # Specify a single approval policy for all tools. One of `always` or + # `never`. When set to `always`, all tools will require approval. When + # set to `never`, all tools will not require approval. + variant enum: -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting } + + class McpToolApprovalFilter < OpenAI::Internal::Type::BaseModel + # @!attribute always + # A filter object to specify which tools are allowed. + # + # @return [OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always, nil] + optional :always, + -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always } + + # @!attribute never + # A filter object to specify which tools are allowed. + # + # @return [OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never, nil] + optional :never, + -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never } + + # @!method initialize(always: nil, never: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter} + # for more details. + # + # Specify which of the MCP server's tools require approval. Can be `always`, + # `never`, or a filter object associated with tools that require approval. + # + # @param always [OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always] A filter object to specify which tools are allowed. + # + # @param never [OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never] A filter object to specify which tools are allowed. + + # @see OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter#always + class Always < OpenAI::Internal::Type::BaseModel + # @!attribute read_only + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + # + # @return [Boolean, nil] + optional :read_only, OpenAI::Internal::Type::Boolean + + # @!attribute tool_names + # List of allowed tool names. + # + # @return [Array, nil] + optional :tool_names, OpenAI::Internal::Type::ArrayOf[String] + + # @!method initialize(read_only: nil, tool_names: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always} + # for more details. + # + # A filter object to specify which tools are allowed. + # + # @param read_only [Boolean] Indicates whether or not a tool modifies data or is read-only. If an + # + # @param tool_names [Array] List of allowed tool names. + end + + # @see OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter#never + class Never < OpenAI::Internal::Type::BaseModel + # @!attribute read_only + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + # + # @return [Boolean, nil] + optional :read_only, OpenAI::Internal::Type::Boolean + + # @!attribute tool_names + # List of allowed tool names. + # + # @return [Array, nil] + optional :tool_names, OpenAI::Internal::Type::ArrayOf[String] + + # @!method initialize(read_only: nil, tool_names: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never} + # for more details. + # + # A filter object to specify which tools are allowed. + # + # @param read_only [Boolean] Indicates whether or not a tool modifies data or is read-only. If an + # + # @param tool_names [Array] List of allowed tool names. + end + end + + # Specify a single approval policy for all tools. One of `always` or `never`. When + # set to `always`, all tools will require approval. When set to `never`, all tools + # will not require approval. + module McpToolApprovalSetting + extend OpenAI::Internal::Type::Enum + + ALWAYS = :always + NEVER = :never + + # @!method self.values + # @return [Array] + end + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting)] + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_response_create_params.rb b/lib/openai/models/realtime/realtime_response_create_params.rb new file mode 100644 index 00000000..c6f9458f --- /dev/null +++ b/lib/openai/models/realtime/realtime_response_create_params.rb @@ -0,0 +1,225 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeResponseCreateParams < OpenAI::Internal::Type::BaseModel + # @!attribute audio + # Configuration for audio input and output. + # + # @return [OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput, nil] + optional :audio, -> { OpenAI::Realtime::RealtimeResponseCreateAudioOutput } + + # @!attribute conversation + # Controls which conversation the response is added to. Currently supports `auto` + # and `none`, with `auto` as the default value. The `auto` value means that the + # contents of the response will be added to the default conversation. Set this to + # `none` to create an out-of-band response which will not add items to default + # conversation. + # + # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateParams::Conversation, nil] + optional :conversation, union: -> { OpenAI::Realtime::RealtimeResponseCreateParams::Conversation } + + # @!attribute input + # Input items to include in the prompt for the model. Using this field creates a + # new context for this Response instead of using the default conversation. An + # empty array `[]` will clear the context for this Response. Note that this can + # include references to items that previously appeared in the session using their + # id. + # + # @return [Array, nil] + optional :input, -> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Realtime::ConversationItem] } + + # @!attribute instructions + # The default system instructions (i.e. system message) prepended to model calls. + # This field allows the client to guide the model on desired responses. The model + # can be instructed on response content and format, (e.g. "be extremely succinct", + # "act friendly", "here are examples of good responses") and on audio behavior + # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The + # instructions are not guaranteed to be followed by the model, but they provide + # guidance to the model on the desired behavior. Note that the server sets default + # instructions which will be used if this field is not set and are visible in the + # `session.created` event at the start of the session. + # + # @return [String, nil] + optional :instructions, String + + # @!attribute max_output_tokens + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + # `inf` for the maximum available tokens for a given model. Defaults to `inf`. + # + # @return [Integer, Symbol, :inf, nil] + optional :max_output_tokens, union: -> { OpenAI::Realtime::RealtimeResponseCreateParams::MaxOutputTokens } + + # @!attribute metadata + # Set of 16 key-value pairs that can be attached to an object. This can be useful + # for storing additional information about the object in a structured format, and + # querying for objects via API or the dashboard. + # + # Keys are strings with a maximum length of 64 characters. Values are strings with + # a maximum length of 512 characters. + # + # @return [Hash{Symbol=>String}, nil] + optional :metadata, OpenAI::Internal::Type::HashOf[String], nil?: true + + # @!attribute output_modalities + # The set of modalities the model used to respond, currently the only possible + # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text + # transcript. Setting the output to mode `text` will disable audio output from the + # model. + # + # @return [Array, nil] + optional :output_modalities, + -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality] } + + # @!attribute prompt + # Reference to a prompt template and its variables. + # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + # + # @return [OpenAI::Models::Responses::ResponsePrompt, nil] + optional :prompt, -> { OpenAI::Responses::ResponsePrompt }, nil?: true + + # @!attribute tool_choice + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + # + # @return [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp, nil] + optional :tool_choice, union: -> { OpenAI::Realtime::RealtimeResponseCreateParams::ToolChoice } + + # @!attribute tools + # Tools available to the model. + # + # @return [Array, nil] + optional :tools, + -> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Realtime::RealtimeResponseCreateParams::Tool] } + + # @!method initialize(audio: nil, conversation: nil, input: nil, instructions: nil, max_output_tokens: nil, metadata: nil, output_modalities: nil, prompt: nil, tool_choice: nil, tools: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeResponseCreateParams} for more details. + # + # Create a new Realtime response with these parameters + # + # @param audio [OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput] Configuration for audio input and output. + # + # @param conversation [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateParams::Conversation] Controls which conversation the response is added to. Currently supports + # + # @param input [Array] Input items to include in the prompt for the model. Using this field + # + # @param instructions [String] The default system instructions (i.e. system message) prepended to model calls. + # + # @param max_output_tokens [Integer, Symbol, :inf] Maximum number of output tokens for a single assistant response, + # + # @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. This can be + # + # @param output_modalities [Array] The set of modalities the model used to respond, currently the only possible val + # + # @param prompt [OpenAI::Models::Responses::ResponsePrompt, nil] Reference to a prompt template and its variables. + # + # @param tool_choice [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp] How the model chooses tools. Provide one of the string modes or force a specific + # + # @param tools [Array] Tools available to the model. + + # Controls which conversation the response is added to. Currently supports `auto` + # and `none`, with `auto` as the default value. The `auto` value means that the + # contents of the response will be added to the default conversation. Set this to + # `none` to create an out-of-band response which will not add items to default + # conversation. + # + # @see OpenAI::Models::Realtime::RealtimeResponseCreateParams#conversation + module Conversation + extend OpenAI::Internal::Type::Union + + variant String + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateParams::Conversation::AUTO } + + variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateParams::Conversation::NONE } + + # @!method self.variants + # @return [Array(String, Symbol)] + + define_sorbet_constant!(:Variants) do + T.type_alias { T.any(String, OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::TaggedSymbol) } + end + + # @!group + + AUTO = :auto + NONE = :none + + # @!endgroup + end + + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + # `inf` for the maximum available tokens for a given model. Defaults to `inf`. + # + # @see OpenAI::Models::Realtime::RealtimeResponseCreateParams#max_output_tokens + module MaxOutputTokens + extend OpenAI::Internal::Type::Union + + variant Integer + + variant const: :inf + + # @!method self.variants + # @return [Array(Integer, Symbol, :inf)] + end + + module OutputModality + extend OpenAI::Internal::Type::Enum + + TEXT = :text + AUDIO = :audio + + # @!method self.values + # @return [Array] + end + + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + # + # @see OpenAI::Models::Realtime::RealtimeResponseCreateParams#tool_choice + module ToolChoice + extend OpenAI::Internal::Type::Union + + # Controls which (if any) tool is called by the model. + # + # `none` means the model will not call any tool and instead generates a message. + # + # `auto` means the model can pick between generating a message or calling one or + # more tools. + # + # `required` means the model must call one or more tools. + variant enum: -> { OpenAI::Responses::ToolChoiceOptions } + + # Use this option to force the model to call a specific function. + variant -> { OpenAI::Responses::ToolChoiceFunction } + + # Use this option to force the model to call a specific tool on a remote MCP server. + variant -> { OpenAI::Responses::ToolChoiceMcp } + + # @!method self.variants + # @return [Array(Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp)] + end + + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + module Tool + extend OpenAI::Internal::Type::Union + + variant -> { OpenAI::Realtime::Models } + + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + variant -> { OpenAI::Realtime::RealtimeResponseCreateMcpTool } + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::Models, OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool)] + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_response_status.rb b/lib/openai/models/realtime/realtime_response_status.rb index 2c3678b4..47b5e5bf 100644 --- a/lib/openai/models/realtime/realtime_response_status.rb +++ b/lib/openai/models/realtime/realtime_response_status.rb @@ -36,7 +36,7 @@ class RealtimeResponseStatus < OpenAI::Internal::Type::BaseModel # # @param error [OpenAI::Models::Realtime::RealtimeResponseStatus::Error] A description of the error that caused the response to fail, # - # @param reason [Symbol, OpenAI::Models::Realtime::RealtimeResponseStatus::Reason] The reason the Response did not complete. For a `cancelled` Response, + # @param reason [Symbol, OpenAI::Models::Realtime::RealtimeResponseStatus::Reason] The reason the Response did not complete. For a `cancelled` Response, one of `t # # @param type [Symbol, OpenAI::Models::Realtime::RealtimeResponseStatus::Type] The type of error that caused the response to fail, corresponding diff --git a/lib/openai/models/realtime/realtime_response_usage.rb b/lib/openai/models/realtime/realtime_response_usage.rb index 8a1a1679..993b1f0e 100644 --- a/lib/openai/models/realtime/realtime_response_usage.rb +++ b/lib/openai/models/realtime/realtime_response_usage.rb @@ -5,7 +5,10 @@ module Models module Realtime class RealtimeResponseUsage < OpenAI::Internal::Type::BaseModel # @!attribute input_token_details - # Details about the input tokens used in the Response. + # Details about the input tokens used in the Response. Cached tokens are tokens + # from previous turns in the conversation that are included as context for the + # current response. Cached tokens here are counted as a subset of input tokens, + # meaning input tokens will include cached and uncached tokens. # # @return [OpenAI::Models::Realtime::RealtimeResponseUsageInputTokenDetails, nil] optional :input_token_details, -> { OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails } @@ -46,7 +49,7 @@ class RealtimeResponseUsage < OpenAI::Internal::Type::BaseModel # Conversation, thus output from previous turns (text and audio tokens) will # become the input for later turns. # - # @param input_token_details [OpenAI::Models::Realtime::RealtimeResponseUsageInputTokenDetails] Details about the input tokens used in the Response. + # @param input_token_details [OpenAI::Models::Realtime::RealtimeResponseUsageInputTokenDetails] Details about the input tokens used in the Response. Cached tokens are tokens fr # # @param input_tokens [Integer] The number of input tokens used in the Response, including text and # diff --git a/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb b/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb index 847056dd..913b5fef 100644 --- a/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +++ b/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb @@ -5,31 +5,81 @@ module Models module Realtime class RealtimeResponseUsageInputTokenDetails < OpenAI::Internal::Type::BaseModel # @!attribute audio_tokens - # The number of audio tokens used in the Response. + # The number of audio tokens used as input for the Response. # # @return [Integer, nil] optional :audio_tokens, Integer # @!attribute cached_tokens - # The number of cached tokens used in the Response. + # The number of cached tokens used as input for the Response. # # @return [Integer, nil] optional :cached_tokens, Integer + # @!attribute cached_tokens_details + # Details about the cached tokens used as input for the Response. + # + # @return [OpenAI::Models::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails, nil] + optional :cached_tokens_details, + -> { OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails } + + # @!attribute image_tokens + # The number of image tokens used as input for the Response. + # + # @return [Integer, nil] + optional :image_tokens, Integer + # @!attribute text_tokens - # The number of text tokens used in the Response. + # The number of text tokens used as input for the Response. # # @return [Integer, nil] optional :text_tokens, Integer - # @!method initialize(audio_tokens: nil, cached_tokens: nil, text_tokens: nil) - # Details about the input tokens used in the Response. + # @!method initialize(audio_tokens: nil, cached_tokens: nil, cached_tokens_details: nil, image_tokens: nil, text_tokens: nil) + # Details about the input tokens used in the Response. Cached tokens are tokens + # from previous turns in the conversation that are included as context for the + # current response. Cached tokens here are counted as a subset of input tokens, + # meaning input tokens will include cached and uncached tokens. + # + # @param audio_tokens [Integer] The number of audio tokens used as input for the Response. # - # @param audio_tokens [Integer] The number of audio tokens used in the Response. + # @param cached_tokens [Integer] The number of cached tokens used as input for the Response. # - # @param cached_tokens [Integer] The number of cached tokens used in the Response. + # @param cached_tokens_details [OpenAI::Models::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails] Details about the cached tokens used as input for the Response. # - # @param text_tokens [Integer] The number of text tokens used in the Response. + # @param image_tokens [Integer] The number of image tokens used as input for the Response. + # + # @param text_tokens [Integer] The number of text tokens used as input for the Response. + + # @see OpenAI::Models::Realtime::RealtimeResponseUsageInputTokenDetails#cached_tokens_details + class CachedTokensDetails < OpenAI::Internal::Type::BaseModel + # @!attribute audio_tokens + # The number of cached audio tokens used as input for the Response. + # + # @return [Integer, nil] + optional :audio_tokens, Integer + + # @!attribute image_tokens + # The number of cached image tokens used as input for the Response. + # + # @return [Integer, nil] + optional :image_tokens, Integer + + # @!attribute text_tokens + # The number of cached text tokens used as input for the Response. + # + # @return [Integer, nil] + optional :text_tokens, Integer + + # @!method initialize(audio_tokens: nil, image_tokens: nil, text_tokens: nil) + # Details about the cached tokens used as input for the Response. + # + # @param audio_tokens [Integer] The number of cached audio tokens used as input for the Response. + # + # @param image_tokens [Integer] The number of cached image tokens used as input for the Response. + # + # @param text_tokens [Integer] The number of cached text tokens used as input for the Response. + end end end end diff --git a/lib/openai/models/realtime/realtime_server_event.rb b/lib/openai/models/realtime/realtime_server_event.rb index 3f400751..33b083c2 100644 --- a/lib/openai/models/realtime/realtime_server_event.rb +++ b/lib/openai/models/realtime/realtime_server_event.rb @@ -30,7 +30,7 @@ module RealtimeServerEvent # This event is the output of audio transcription for user audio written to the # user audio buffer. Transcription begins when the input audio buffer is - # committed by the client or server (in `server_vad` mode). Transcription runs + # committed by the client or server (when VAD is enabled). Transcription runs # asynchronously with Response creation, so this event may come before or after # the Response events. # @@ -41,7 +41,7 @@ module RealtimeServerEvent variant :"conversation.item.input_audio_transcription.completed", -> { OpenAI::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent } - # Returned when the text value of an input audio transcription content part is updated. + # Returned when the text value of an input audio transcription content part is updated with incremental transcription results. variant :"conversation.item.input_audio_transcription.delta", -> { OpenAI::Realtime::ConversationItemInputAudioTranscriptionDeltaEvent } @@ -51,7 +51,7 @@ module RealtimeServerEvent variant :"conversation.item.input_audio_transcription.failed", -> { OpenAI::Realtime::ConversationItemInputAudioTranscriptionFailedEvent } - # Returned when a conversation item is retrieved with `conversation.item.retrieve`. + # Returned when a conversation item is retrieved with `conversation.item.retrieve`. This is provided as a way to fetch the server's representation of an item, for example to get access to the post-processed audio data after noise cancellation and VAD. It includes the full content of the Item, including audio data. variant :"conversation.item.retrieved", -> { OpenAI::Realtime::RealtimeServerEvent::ConversationItemRetrieved } @@ -133,6 +133,12 @@ module RealtimeServerEvent # Returned when a Response is done streaming. Always emitted, no matter the # final state. The Response object included in the `response.done` event will # include all output Items in the Response but will omit the raw audio data. + # + # Clients should check the `status` field of the Response to determine if it was successful + # (`completed`) or if there was another outcome: `cancelled`, `failed`, or `incomplete`. + # + # A response will contain all output items that were generated during the response, excluding + # any audio content. variant :"response.done", -> { OpenAI::Realtime::ResponseDoneEvent } # Returned when the model-generated function call arguments are updated. @@ -196,10 +202,17 @@ module RealtimeServerEvent variant :"output_audio_buffer.cleared", -> { OpenAI::Realtime::RealtimeServerEvent::OutputAudioBufferCleared } - # Returned when a conversation item is added. + # Sent by the server when an Item is added to the default Conversation. This can happen in several cases: + # - When the client sends a `conversation.item.create` event. + # - When the input audio buffer is committed. In this case the item will be a user message containing the audio from the buffer. + # - When the model is generating a Response. In this case the `conversation.item.added` event will be sent when the model starts generating a specific Item, and thus it will not yet have any content (and `status` will be `in_progress`). + # + # The event will include the full content of the Item (except when model is generating a Response) except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if necessary. variant :"conversation.item.added", -> { OpenAI::Realtime::ConversationItemAdded } # Returned when a conversation item is finalized. + # + # The event will include the full content of the Item except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if needed. variant :"conversation.item.done", -> { OpenAI::Realtime::ConversationItemDone } # Returned when the server VAD timeout is triggered for the input audio buffer. @@ -254,7 +267,10 @@ class ConversationItemRetrieved < OpenAI::Internal::Type::BaseModel # @!method initialize(event_id:, item:, type: :"conversation.item.retrieved") # Returned when a conversation item is retrieved with - # `conversation.item.retrieve`. + # `conversation.item.retrieve`. This is provided as a way to fetch the server's + # representation of an item, for example to get access to the post-processed audio + # data after noise cancellation and VAD. It includes the full content of the Item, + # including audio data. # # @param event_id [String] The unique ID of the server event. # diff --git a/lib/openai/models/realtime/realtime_session.rb b/lib/openai/models/realtime/realtime_session.rb index a95b4c36..69f3c37c 100644 --- a/lib/openai/models/realtime/realtime_session.rb +++ b/lib/openai/models/realtime/realtime_session.rb @@ -55,10 +55,8 @@ class RealtimeSession < OpenAI::Internal::Type::BaseModel # what the model heard. The client can optionally set the language and prompt for # transcription, these offer additional guidance to the transcription service. # - # @return [OpenAI::Models::Realtime::RealtimeSession::InputAudioTranscription, nil] - optional :input_audio_transcription, - -> { OpenAI::Realtime::RealtimeSession::InputAudioTranscription }, - nil?: true + # @return [OpenAI::Models::Realtime::AudioTranscription, nil] + optional :input_audio_transcription, -> { OpenAI::Realtime::AudioTranscription }, nil?: true # @!attribute instructions # The default system instructions (i.e. system message) prepended to model calls. @@ -144,8 +142,8 @@ class RealtimeSession < OpenAI::Internal::Type::BaseModel # @!attribute tools # Tools (functions) available to the model. # - # @return [Array, nil] - optional :tools, -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Realtime::RealtimeSession::Tool] } + # @return [Array, nil] + optional :tools, -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Realtime::Models] } # @!attribute tracing # Configuration options for tracing. Set to null to disable tracing. Once tracing @@ -196,7 +194,7 @@ class RealtimeSession < OpenAI::Internal::Type::BaseModel # # @param input_audio_noise_reduction [OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn # - # @param input_audio_transcription [OpenAI::Models::Realtime::RealtimeSession::InputAudioTranscription, nil] Configuration for input audio transcription, defaults to off and can be set to ` + # @param input_audio_transcription [OpenAI::Models::Realtime::AudioTranscription, nil] Configuration for input audio transcription, defaults to off and can be set to ` # # @param instructions [String] The default system instructions (i.e. system message) prepended to model # @@ -218,7 +216,7 @@ class RealtimeSession < OpenAI::Internal::Type::BaseModel # # @param tool_choice [String] How the model chooses tools. Options are `auto`, `none`, `required`, or # - # @param tools [Array] Tools (functions) available to the model. + # @param tools [Array] Tools (functions) available to the model. # # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeSession::Tracing::TracingConfiguration, nil] Configuration options for tracing. Set to null to disable tracing. Once # @@ -258,8 +256,8 @@ class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel # headphones, `far_field` is for far-field microphones such as laptop or # conference room microphones. # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::Type, nil] - optional :type, enum: -> { OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type } + # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil] + optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType } # @!method initialize(type: nil) # Some parameter documentations has been truncated, see @@ -272,70 +270,7 @@ class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel # detection accuracy (reducing false positives) and model performance by improving # perception of the input audio. # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::Type] Type of noise reduction. `near_field` is for close-talking microphones such as h - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - # - # @see OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction#type - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD = :near_field - FAR_FIELD = :far_field - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::RealtimeSession#input_audio_transcription - class InputAudioTranscription < OpenAI::Internal::Type::BaseModel - # @!attribute language - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - # - # @return [String, nil] - optional :language, String - - # @!attribute model - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - # - # @return [String, nil] - optional :model, String - - # @!attribute prompt - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - # - # @return [String, nil] - optional :prompt, String - - # @!method initialize(language: nil, model: nil, prompt: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeSession::InputAudioTranscription} for more - # details. - # - # Configuration for input audio transcription, defaults to off and can be set to - # `null` to turn off once on. Input audio transcription is not native to the - # model, since the model consumes audio directly. Transcription runs - # asynchronously through - # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - # and should be treated as guidance of input audio content rather than precisely - # what the model heard. The client can optionally set the language and prompt for - # transcription, these offer additional guidance to the transcription service. - # - # @param language [String] The language of the input audio. Supplying the input language in - # - # @param model [String] The model to use for transcription, current options are `gpt-4o-transcribe`, `gp - # - # @param prompt [String] An optional text to guide the model's style or continue a previous audio + # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h end # Maximum number of output tokens for a single assistant response, inclusive of @@ -410,57 +345,6 @@ module OutputAudioFormat # @return [Array] end - class Tool < OpenAI::Internal::Type::BaseModel - # @!attribute description - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - # - # @return [String, nil] - optional :description, String - - # @!attribute name - # The name of the function. - # - # @return [String, nil] - optional :name, String - - # @!attribute parameters - # Parameters of the function in JSON Schema. - # - # @return [Object, nil] - optional :parameters, OpenAI::Internal::Type::Unknown - - # @!attribute type - # The type of the tool, i.e. `function`. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeSession::Tool::Type, nil] - optional :type, enum: -> { OpenAI::Realtime::RealtimeSession::Tool::Type } - - # @!method initialize(description: nil, name: nil, parameters: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeSession::Tool} for more details. - # - # @param description [String] The description of the function, including guidance on when and how - # - # @param name [String] The name of the function. - # - # @param parameters [Object] Parameters of the function in JSON Schema. - # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSession::Tool::Type] The type of the tool, i.e. `function`. - - # The type of the tool, i.e. `function`. - # - # @see OpenAI::Models::Realtime::RealtimeSession::Tool#type - module Type - extend OpenAI::Internal::Type::Enum - - FUNCTION = :function - - # @!method self.values - # @return [Array] - end - end - # Configuration options for tracing. Set to null to disable tracing. Once tracing # is enabled for a session, the configuration cannot be modified. # diff --git a/lib/openai/models/realtime/realtime_session_client_secret.rb b/lib/openai/models/realtime/realtime_session_client_secret.rb new file mode 100644 index 00000000..17e6b555 --- /dev/null +++ b/lib/openai/models/realtime/realtime_session_client_secret.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeSessionClientSecret < OpenAI::Internal::Type::BaseModel + # @!attribute expires_at + # Timestamp for when the token expires. Currently, all tokens expire after one + # minute. + # + # @return [Integer] + required :expires_at, Integer + + # @!attribute value + # Ephemeral key usable in client environments to authenticate connections to the + # Realtime API. Use this in client-side environments rather than a standard API + # token, which should only be used server-side. + # + # @return [String] + required :value, String + + # @!method initialize(expires_at:, value:) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeSessionClientSecret} for more details. + # + # Ephemeral key returned by the API. + # + # @param expires_at [Integer] Timestamp for when the token expires. Currently, all tokens expire + # + # @param value [String] Ephemeral key usable in client environments to authenticate connections to the R + end + end + + RealtimeSessionClientSecret = Realtime::RealtimeSessionClientSecret + end +end diff --git a/lib/openai/models/realtime/realtime_session_create_request.rb b/lib/openai/models/realtime/realtime_session_create_request.rb index 619c194f..62e0e9e5 100644 --- a/lib/openai/models/realtime/realtime_session_create_request.rb +++ b/lib/openai/models/realtime/realtime_session_create_request.rb @@ -4,12 +4,6 @@ module OpenAI module Models module Realtime class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel - # @!attribute model - # The Realtime model used for this session. - # - # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model] - required :model, union: -> { OpenAI::Realtime::RealtimeSessionCreateRequest::Model } - # @!attribute type # The type of session to create. Always `realtime` for the Realtime API. # @@ -22,17 +16,11 @@ class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel # @return [OpenAI::Models::Realtime::RealtimeAudioConfig, nil] optional :audio, -> { OpenAI::Realtime::RealtimeAudioConfig } - # @!attribute client_secret - # Configuration options for the generated client secret. - # - # @return [OpenAI::Models::Realtime::RealtimeClientSecretConfig, nil] - optional :client_secret, -> { OpenAI::Realtime::RealtimeClientSecretConfig } - # @!attribute include # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. # # @return [Array, nil] optional :include, @@ -62,9 +50,17 @@ class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel # @return [Integer, Symbol, :inf, nil] optional :max_output_tokens, union: -> { OpenAI::Realtime::RealtimeSessionCreateRequest::MaxOutputTokens } + # @!attribute model + # The Realtime model used for this session. + # + # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model, nil] + optional :model, union: -> { OpenAI::Realtime::RealtimeSessionCreateRequest::Model } + # @!attribute output_modalities - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. + # The set of modalities the model can respond with. It defaults to `["audio"]`, + # indicating that the model will respond with audio plus a transcript. `["text"]` + # can be used to make the model respond with text only. It is not possible to + # request both `text` and `audio` at the same time. # # @return [Array, nil] optional :output_modalities, @@ -77,13 +73,6 @@ class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel # @return [OpenAI::Models::Responses::ResponsePrompt, nil] optional :prompt, -> { OpenAI::Responses::ResponsePrompt }, nil?: true - # @!attribute temperature - # Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a - # temperature of 0.8 is highly recommended for best performance. - # - # @return [Float, nil] - optional :temperature, Float - # @!attribute tool_choice # How the model chooses tools. Provide one of the string modes or force a specific # function/MCP tool. @@ -94,12 +83,13 @@ class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel # @!attribute tools # Tools available to the model. # - # @return [Array, nil] + # @return [Array, nil] optional :tools, -> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Realtime::RealtimeToolsConfigUnion] } # @!attribute tracing - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. @@ -109,46 +99,66 @@ class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel # @!attribute truncation # Controls how the realtime conversation is truncated prior to model inference. - # The default is `auto`. When set to `retention_ratio`, the server retains a - # fraction of the conversation tokens prior to the instructions. + # The default is `auto`. # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation, nil] + # @return [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio, nil] optional :truncation, union: -> { OpenAI::Realtime::RealtimeTruncation } - # @!method initialize(model:, audio: nil, client_secret: nil, include: nil, instructions: nil, max_output_tokens: nil, output_modalities: nil, prompt: nil, temperature: nil, tool_choice: nil, tools: nil, tracing: nil, truncation: nil, type: :realtime) + # @!method initialize(audio: nil, include: nil, instructions: nil, max_output_tokens: nil, model: nil, output_modalities: nil, prompt: nil, tool_choice: nil, tools: nil, tracing: nil, truncation: nil, type: :realtime) # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::RealtimeSessionCreateRequest} for more details. # # Realtime session object configuration. # - # @param model [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model] The Realtime model used for this session. - # # @param audio [OpenAI::Models::Realtime::RealtimeAudioConfig] Configuration for input and output audio. # - # @param client_secret [OpenAI::Models::Realtime::RealtimeClientSecretConfig] Configuration options for the generated client secret. - # # @param include [Array] Additional fields to include in server outputs. # # @param instructions [String] The default system instructions (i.e. system message) prepended to model calls. # # @param max_output_tokens [Integer, Symbol, :inf] Maximum number of output tokens for a single assistant response, # - # @param output_modalities [Array] The set of modalities the model can respond with. To disable audio, + # @param model [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model] The Realtime model used for this session. + # + # @param output_modalities [Array] The set of modalities the model can respond with. It defaults to `["audio"]`, in # # @param prompt [OpenAI::Models::Responses::ResponsePrompt, nil] Reference to a prompt template and its variables. # - # @param temperature [Float] Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a te - # # @param tool_choice [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp] How the model chooses tools. Provide one of the string modes or force a specific # - # @param tools [Array] Tools available to the model. + # @param tools [Array] Tools available to the model. # - # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeTracingConfig::TracingConfiguration, nil] Configuration options for tracing. Set to null to disable tracing. Once + # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeTracingConfig::TracingConfiguration, nil] Realtime API can write session traces to the [Traces Dashboard](/logs?api=traces # - # @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation] Controls how the realtime conversation is truncated prior to model inference. + # @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio] Controls how the realtime conversation is truncated prior to model inference. # # @param type [Symbol, :realtime] The type of session to create. Always `realtime` for the Realtime API. + module Include + extend OpenAI::Internal::Type::Enum + + ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs" + + # @!method self.values + # @return [Array] + end + + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + # `inf` for the maximum available tokens for a given model. Defaults to `inf`. + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateRequest#max_output_tokens + module MaxOutputTokens + extend OpenAI::Internal::Type::Union + + variant Integer + + variant const: :inf + + # @!method self.variants + # @return [Array(Integer, Symbol, :inf)] + end + # The Realtime model used for this session. # # @see OpenAI::Models::Realtime::RealtimeSessionCreateRequest#model @@ -161,10 +171,6 @@ module Model variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_REALTIME_2025_08_28 } - variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_REALTIME } - - variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_MINI_REALTIME } - variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_REALTIME_PREVIEW } variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_REALTIME_PREVIEW_2024_10_01 } @@ -188,8 +194,6 @@ module Model GPT_REALTIME = :"gpt-realtime" GPT_REALTIME_2025_08_28 = :"gpt-realtime-2025-08-28" - GPT_4O_REALTIME = :"gpt-4o-realtime" - GPT_4O_MINI_REALTIME = :"gpt-4o-mini-realtime" GPT_4O_REALTIME_PREVIEW = :"gpt-4o-realtime-preview" GPT_4O_REALTIME_PREVIEW_2024_10_01 = :"gpt-4o-realtime-preview-2024-10-01" GPT_4O_REALTIME_PREVIEW_2024_12_17 = :"gpt-4o-realtime-preview-2024-12-17" @@ -200,31 +204,6 @@ module Model # @!endgroup end - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs" - - # @!method self.values - # @return [Array] - end - - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or - # `inf` for the maximum available tokens for a given model. Defaults to `inf`. - # - # @see OpenAI::Models::Realtime::RealtimeSessionCreateRequest#max_output_tokens - module MaxOutputTokens - extend OpenAI::Internal::Type::Union - - variant Integer - - variant const: :inf - - # @!method self.variants - # @return [Array(Integer, Symbol, :inf)] - end - module OutputModality extend OpenAI::Internal::Type::Enum diff --git a/lib/openai/models/realtime/realtime_session_create_response.rb b/lib/openai/models/realtime/realtime_session_create_response.rb index 7612327a..9824439c 100644 --- a/lib/openai/models/realtime/realtime_session_create_response.rb +++ b/lib/openai/models/realtime/realtime_session_create_response.rb @@ -4,29 +4,23 @@ module OpenAI module Models module Realtime class RealtimeSessionCreateResponse < OpenAI::Internal::Type::BaseModel - # @!attribute id - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - # - # @return [String, nil] - optional :id, String - # @!attribute audio - # Configuration for input and output audio for the session. + # Configuration for input and output audio. # # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio, nil] optional :audio, -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio } - # @!attribute expires_at - # Expiration timestamp for the session, in seconds since epoch. + # @!attribute client_secret + # Ephemeral key returned by the API. # - # @return [Integer, nil] - optional :expires_at, Integer + # @return [OpenAI::Models::Realtime::RealtimeSessionClientSecret, nil] + optional :client_secret, -> { OpenAI::Realtime::RealtimeSessionClientSecret } # @!attribute include # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. # # @return [Array, nil] optional :include, @@ -60,86 +54,96 @@ class RealtimeSessionCreateResponse < OpenAI::Internal::Type::BaseModel # @!attribute model # The Realtime model used for this session. # - # @return [String, nil] - optional :model, String - - # @!attribute object - # The object type. Always `realtime.session`. - # - # @return [String, nil] - optional :object, String + # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model, nil] + optional :model, union: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Model } # @!attribute output_modalities - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. + # The set of modalities the model can respond with. It defaults to `["audio"]`, + # indicating that the model will respond with audio plus a transcript. `["text"]` + # can be used to make the model respond with text only. It is not possible to + # request both `text` and `audio` at the same time. # # @return [Array, nil] optional :output_modalities, -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality] } + # @!attribute prompt + # Reference to a prompt template and its variables. + # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + # + # @return [OpenAI::Models::Responses::ResponsePrompt, nil] + optional :prompt, -> { OpenAI::Responses::ResponsePrompt }, nil?: true + # @!attribute tool_choice - # How the model chooses tools. Options are `auto`, `none`, `required`, or specify - # a function. + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. # - # @return [String, nil] - optional :tool_choice, String + # @return [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp, nil] + optional :tool_choice, union: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice } # @!attribute tools - # Tools (functions) available to the model. + # Tools available to the model. # - # @return [Array, nil] + # @return [Array, nil] optional :tools, - -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool] } + -> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool] } # @!attribute tracing - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. # # @return [Symbol, :auto, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration, nil] - optional :tracing, union: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing } + optional :tracing, union: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing }, nil?: true + + # @!attribute truncation + # Controls how the realtime conversation is truncated prior to model inference. + # The default is `auto`. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio, nil] + optional :truncation, union: -> { OpenAI::Realtime::RealtimeTruncation } - # @!attribute turn_detection - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. + # @!attribute type + # The type of session to create. Always `realtime` for the Realtime API. # - # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::TurnDetection, nil] - optional :turn_detection, -> { OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection } + # @return [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Type } - # @!method initialize(id: nil, audio: nil, expires_at: nil, include: nil, instructions: nil, max_output_tokens: nil, model: nil, object: nil, output_modalities: nil, tool_choice: nil, tools: nil, tracing: nil, turn_detection: nil) + # @!method initialize(audio: nil, client_secret: nil, include: nil, instructions: nil, max_output_tokens: nil, model: nil, output_modalities: nil, prompt: nil, tool_choice: nil, tools: nil, tracing: nil, truncation: nil, type: nil) # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse} for more details. # - # A Realtime session configuration object. + # A new Realtime session configuration, with an ephemeral key. Default TTL for + # keys is one minute. # - # @param id [String] Unique identifier for the session that looks like `sess_1234567890abcdef`. + # @param audio [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio] Configuration for input and output audio. # - # @param audio [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio] Configuration for input and output audio for the session. - # - # @param expires_at [Integer] Expiration timestamp for the session, in seconds since epoch. + # @param client_secret [OpenAI::Models::Realtime::RealtimeSessionClientSecret] Ephemeral key returned by the API. # # @param include [Array] Additional fields to include in server outputs. # - # @param instructions [String] The default system instructions (i.e. system message) prepended to model + # @param instructions [String] The default system instructions (i.e. system message) prepended to model calls. # # @param max_output_tokens [Integer, Symbol, :inf] Maximum number of output tokens for a single assistant response, # - # @param model [String] The Realtime model used for this session. + # @param model [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model] The Realtime model used for this session. + # + # @param output_modalities [Array] The set of modalities the model can respond with. It defaults to `["audio"]`, in # - # @param object [String] The object type. Always `realtime.session`. + # @param prompt [OpenAI::Models::Responses::ResponsePrompt, nil] Reference to a prompt template and its variables. # - # @param output_modalities [Array] The set of modalities the model can respond with. To disable audio, + # @param tool_choice [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp] How the model chooses tools. Provide one of the string modes or force a specific # - # @param tool_choice [String] How the model chooses tools. Options are `auto`, `none`, `required`, or + # @param tools [Array] Tools available to the model. # - # @param tools [Array] Tools (functions) available to the model. + # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration, nil] Realtime API can write session traces to the [Traces Dashboard](/logs?api=traces # - # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration] Configuration options for tracing. Set to null to disable tracing. Once + # @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio] Controls how the realtime conversation is truncated prior to model inference. # - # @param turn_detection [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::TurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Type] The type of session to create. Always `realtime` for the Realtime API. # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse#audio class Audio < OpenAI::Internal::Type::BaseModel @@ -154,7 +158,7 @@ class Audio < OpenAI::Internal::Type::BaseModel optional :output, -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Output } # @!method initialize(input: nil, output: nil) - # Configuration for input and output audio for the session. + # Configuration for input and output audio. # # @param input [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input] # @param output [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output] @@ -162,27 +166,46 @@ class Audio < OpenAI::Internal::Type::BaseModel # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio#input class Input < OpenAI::Internal::Type::BaseModel # @!attribute format_ - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # The format of the input audio. # - # @return [String, nil] - optional :format_, String, api_name: :format + # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil] + optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format # @!attribute noise_reduction - # Configuration for input audio noise reduction. + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. # # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction, nil] optional :noise_reduction, -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction } # @!attribute transcription - # Configuration for input audio transcription. + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. # - # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription, nil] - optional :transcription, - -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription } + # @return [OpenAI::Models::Realtime::AudioTranscription, nil] + optional :transcription, -> { OpenAI::Realtime::AudioTranscription } # @!attribute turn_detection - # Configuration for turn detection. + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. # # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection, nil] optional :turn_detection, @@ -193,127 +216,192 @@ class Input < OpenAI::Internal::Type::BaseModel # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input} for more # details. # - # @param format_ [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the input audio. # - # @param noise_reduction [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction] Configuration for input audio noise reduction. + # @param noise_reduction [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn # - # @param transcription [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription] Configuration for input audio transcription. + # @param transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription, defaults to off and can be set to ` # - # @param turn_detection [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection] Configuration for turn detection. + # @param turn_detection [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input#noise_reduction class NoiseReduction < OpenAI::Internal::Type::BaseModel # @!attribute type + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type, nil] - optional :type, - enum: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type } + # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil] + optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType } # @!method initialize(type: nil) - # Configuration for input audio noise reduction. + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction} + # for more details. # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type] - - # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction#type - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD = :near_field - FAR_FIELD = :far_field - - # @!method self.values - # @return [Array] - end + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + # + # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h end - # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input#transcription - class Transcription < OpenAI::Internal::Type::BaseModel - # @!attribute language - # The language of the input audio. + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input#turn_detection + class TurnDetection < OpenAI::Internal::Type::BaseModel + # @!attribute create_response + # Whether or not to automatically generate a response when a VAD stop event + # occurs. # - # @return [String, nil] - optional :language, String - - # @!attribute model - # The model to use for transcription. + # @return [Boolean, nil] + optional :create_response, OpenAI::Internal::Type::Boolean + + # @!attribute eagerness + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. # - # @return [String, nil] - optional :model, String + # @return [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness, nil] + optional :eagerness, + enum: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness } - # @!attribute prompt - # Optional text to guide the model's style or continue a previous audio segment. + # @!attribute idle_timeout_ms + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. # - # @return [String, nil] - optional :prompt, String + # @return [Integer, nil] + optional :idle_timeout_ms, Integer, nil?: true - # @!method initialize(language: nil, model: nil, prompt: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription} - # for more details. - # - # Configuration for input audio transcription. + # @!attribute interrupt_response + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. # - # @param language [String] The language of the input audio. - # - # @param model [String] The model to use for transcription. - # - # @param prompt [String] Optional text to guide the model's style or continue a previous audio segment. - end + # @return [Boolean, nil] + optional :interrupt_response, OpenAI::Internal::Type::Boolean - # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input#turn_detection - class TurnDetection < OpenAI::Internal::Type::BaseModel # @!attribute prefix_padding_ms + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. # # @return [Integer, nil] optional :prefix_padding_ms, Integer # @!attribute silence_duration_ms + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. # # @return [Integer, nil] optional :silence_duration_ms, Integer # @!attribute threshold + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. # # @return [Float, nil] optional :threshold, Float # @!attribute type - # Type of turn detection, only `server_vad` is currently supported. + # Type of turn detection. # - # @return [String, nil] - optional :type, String + # @return [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type, nil] + optional :type, + enum: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type } - # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) + # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection} # for more details. # - # Configuration for turn detection. + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + # + # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs + # + # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # + # @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when + # + # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th + # + # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec # - # @param prefix_padding_ms [Integer] + # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m # - # @param silence_duration_ms [Integer] + # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this # - # @param threshold [Float] + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type] Type of turn detection. + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection#eagerness + module Eagerness + extend OpenAI::Internal::Type::Enum + + LOW = :low + MEDIUM = :medium + HIGH = :high + AUTO = :auto + + # @!method self.values + # @return [Array] + end + + # Type of turn detection. # - # @param type [String] Type of turn detection, only `server_vad` is currently supported. + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection#type + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD = :server_vad + SEMANTIC_VAD = :semantic_vad + + # @!method self.values + # @return [Array] + end end end # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio#output class Output < OpenAI::Internal::Type::BaseModel # @!attribute format_ - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # The format of the output audio. # - # @return [String, nil] - optional :format_, String, api_name: :format + # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil] + optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format # @!attribute speed + # The speed of the model's spoken response as a multiple of the original speed. + # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + # This value can only be changed in between model turns, not while a response is + # in progress. + # + # This parameter is a post-processing adjustment to the audio after it is + # generated, it's also possible to prompt the model to speak faster or slower. # # @return [Float, nil] optional :speed, Float # @!attribute voice + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. # # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output::Voice, nil] optional :voice, union: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Output::Voice } @@ -323,12 +411,17 @@ class Output < OpenAI::Internal::Type::BaseModel # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output} for # more details. # - # @param format_ [String] The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the output audio. # - # @param speed [Float] + # @param speed [Float] The speed of the model's spoken response as a multiple of the original speed. # - # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output::Voice] + # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output::Voice] The voice the model uses to respond. Voice cannot be changed during the + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + # # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output#voice module Voice extend OpenAI::Internal::Type::Union @@ -405,6 +498,51 @@ module MaxOutputTokens # @return [Array(Integer, Symbol, :inf)] end + # The Realtime model used for this session. + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse#model + module Model + extend OpenAI::Internal::Type::Union + + variant String + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_REALTIME } + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_REALTIME_2025_08_28 } + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_4O_REALTIME_PREVIEW } + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_4O_REALTIME_PREVIEW_2024_10_01 } + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_4O_REALTIME_PREVIEW_2024_12_17 } + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_4O_REALTIME_PREVIEW_2025_06_03 } + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_4O_MINI_REALTIME_PREVIEW } + + variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Model::GPT_4O_MINI_REALTIME_PREVIEW_2024_12_17 } + + # @!method self.variants + # @return [Array(String, Symbol)] + + define_sorbet_constant!(:Variants) do + T.type_alias { T.any(String, OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol) } + end + + # @!group + + GPT_REALTIME = :"gpt-realtime" + GPT_REALTIME_2025_08_28 = :"gpt-realtime-2025-08-28" + GPT_4O_REALTIME_PREVIEW = :"gpt-4o-realtime-preview" + GPT_4O_REALTIME_PREVIEW_2024_10_01 = :"gpt-4o-realtime-preview-2024-10-01" + GPT_4O_REALTIME_PREVIEW_2024_12_17 = :"gpt-4o-realtime-preview-2024-12-17" + GPT_4O_REALTIME_PREVIEW_2025_06_03 = :"gpt-4o-realtime-preview-2025-06-03" + GPT_4O_MINI_REALTIME_PREVIEW = :"gpt-4o-mini-realtime-preview" + GPT_4O_MINI_REALTIME_PREVIEW_2024_12_17 = :"gpt-4o-mini-realtime-preview-2024-12-17" + + # @!endgroup + end + module OutputModality extend OpenAI::Internal::Type::Enum @@ -415,60 +553,361 @@ module OutputModality # @return [Array] end - class Tool < OpenAI::Internal::Type::BaseModel - # @!attribute description - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - # - # @return [String, nil] - optional :description, String + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse#tool_choice + module ToolChoice + extend OpenAI::Internal::Type::Union - # @!attribute name - # The name of the function. + # Controls which (if any) tool is called by the model. # - # @return [String, nil] - optional :name, String - - # @!attribute parameters - # Parameters of the function in JSON Schema. + # `none` means the model will not call any tool and instead generates a message. # - # @return [Object, nil] - optional :parameters, OpenAI::Internal::Type::Unknown - - # @!attribute type - # The type of the tool, i.e. `function`. + # `auto` means the model can pick between generating a message or calling one or + # more tools. # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::Type, nil] - optional :type, enum: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type } + # `required` means the model must call one or more tools. + variant enum: -> { OpenAI::Responses::ToolChoiceOptions } - # @!method initialize(description: nil, name: nil, parameters: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool} for more - # details. - # - # @param description [String] The description of the function, including guidance on when and how - # - # @param name [String] The name of the function. - # - # @param parameters [Object] Parameters of the function in JSON Schema. - # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::Type] The type of the tool, i.e. `function`. + # Use this option to force the model to call a specific function. + variant -> { OpenAI::Responses::ToolChoiceFunction } - # The type of the tool, i.e. `function`. - # - # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool#type - module Type - extend OpenAI::Internal::Type::Enum + # Use this option to force the model to call a specific tool on a remote MCP server. + variant -> { OpenAI::Responses::ToolChoiceMcp } + + # @!method self.variants + # @return [Array(Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp)] + end + + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + module Tool + extend OpenAI::Internal::Type::Union - FUNCTION = :function + variant -> { OpenAI::Realtime::Models } + + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + variant -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool } + + class McpTool < OpenAI::Internal::Type::BaseModel + # @!attribute server_label + # A label for this MCP server, used to identify it in tool calls. + # + # @return [String] + required :server_label, String - # @!method self.values - # @return [Array] + # @!attribute type + # The type of the MCP tool. Always `mcp`. + # + # @return [Symbol, :mcp] + required :type, const: :mcp + + # @!attribute allowed_tools + # List of allowed tool names or a filter object. + # + # @return [Array, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter, nil] + optional :allowed_tools, + union: -> { + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools + }, + nil?: true + + # @!attribute authorization + # An OAuth access token that can be used with a remote MCP server, either with a + # custom MCP server URL or a service connector. Your application must handle the + # OAuth authorization flow and provide the token here. + # + # @return [String, nil] + optional :authorization, String + + # @!attribute connector_id + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID, nil] + optional :connector_id, + enum: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID } + + # @!attribute headers + # Optional HTTP headers to send to the MCP server. Use for authentication or other + # purposes. + # + # @return [Hash{Symbol=>String}, nil] + optional :headers, OpenAI::Internal::Type::HashOf[String], nil?: true + + # @!attribute require_approval + # Specify which of the MCP server's tools require approval. + # + # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting, nil] + optional :require_approval, + union: -> { + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval + }, + nil?: true + + # @!attribute server_description + # Optional description of the MCP server, used to provide more context. + # + # @return [String, nil] + optional :server_description, String + + # @!attribute server_url + # The URL for the MCP server. One of `server_url` or `connector_id` must be + # provided. + # + # @return [String, nil] + optional :server_url, String + + # @!method initialize(server_label:, allowed_tools: nil, authorization: nil, connector_id: nil, headers: nil, require_approval: nil, server_description: nil, server_url: nil, type: :mcp) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool} for + # more details. + # + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + # + # @param server_label [String] A label for this MCP server, used to identify it in tool calls. + # + # @param allowed_tools [Array, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter, nil] List of allowed tool names or a filter object. + # + # @param authorization [String] An OAuth access token that can be used with a remote MCP server, either + # + # @param connector_id [Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID] Identifier for service connectors, like those available in ChatGPT. One of + # + # @param headers [Hash{Symbol=>String}, nil] Optional HTTP headers to send to the MCP server. Use for authentication + # + # @param require_approval [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting, nil] Specify which of the MCP server's tools require approval. + # + # @param server_description [String] Optional description of the MCP server, used to provide more context. + # + # @param server_url [String] The URL for the MCP server. One of `server_url` or `connector_id` must be + # + # @param type [Symbol, :mcp] The type of the MCP tool. Always `mcp`. + + # List of allowed tool names or a filter object. + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool#allowed_tools + module AllowedTools + extend OpenAI::Internal::Type::Union + + # A string array of allowed tool names + variant -> { OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::StringArray } + + # A filter object to specify which tools are allowed. + variant -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter } + + class McpToolFilter < OpenAI::Internal::Type::BaseModel + # @!attribute read_only + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + # + # @return [Boolean, nil] + optional :read_only, OpenAI::Internal::Type::Boolean + + # @!attribute tool_names + # List of allowed tool names. + # + # @return [Array, nil] + optional :tool_names, OpenAI::Internal::Type::ArrayOf[String] + + # @!method initialize(read_only: nil, tool_names: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter} + # for more details. + # + # A filter object to specify which tools are allowed. + # + # @param read_only [Boolean] Indicates whether or not a tool modifies data or is read-only. If an + # + # @param tool_names [Array] List of allowed tool names. + end + + # @!method self.variants + # @return [Array(Array, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter)] + + # @type [OpenAI::Internal::Type::Converter] + StringArray = OpenAI::Internal::Type::ArrayOf[String] + end + + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool#connector_id + module ConnectorID + extend OpenAI::Internal::Type::Enum + + CONNECTOR_DROPBOX = :connector_dropbox + CONNECTOR_GMAIL = :connector_gmail + CONNECTOR_GOOGLECALENDAR = :connector_googlecalendar + CONNECTOR_GOOGLEDRIVE = :connector_googledrive + CONNECTOR_MICROSOFTTEAMS = :connector_microsoftteams + CONNECTOR_OUTLOOKCALENDAR = :connector_outlookcalendar + CONNECTOR_OUTLOOKEMAIL = :connector_outlookemail + CONNECTOR_SHAREPOINT = :connector_sharepoint + + # @!method self.values + # @return [Array] + end + + # Specify which of the MCP server's tools require approval. + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool#require_approval + module RequireApproval + extend OpenAI::Internal::Type::Union + + # Specify which of the MCP server's tools require approval. Can be + # `always`, `never`, or a filter object associated with tools + # that require approval. + variant -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter } + + # Specify a single approval policy for all tools. One of `always` or + # `never`. When set to `always`, all tools will require approval. When + # set to `never`, all tools will not require approval. + variant enum: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting } + + class McpToolApprovalFilter < OpenAI::Internal::Type::BaseModel + # @!attribute always + # A filter object to specify which tools are allowed. + # + # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always, nil] + optional :always, + -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always } + + # @!attribute never + # A filter object to specify which tools are allowed. + # + # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never, nil] + optional :never, + -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never } + + # @!method initialize(always: nil, never: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter} + # for more details. + # + # Specify which of the MCP server's tools require approval. Can be `always`, + # `never`, or a filter object associated with tools that require approval. + # + # @param always [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always] A filter object to specify which tools are allowed. + # + # @param never [OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never] A filter object to specify which tools are allowed. + + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter#always + class Always < OpenAI::Internal::Type::BaseModel + # @!attribute read_only + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + # + # @return [Boolean, nil] + optional :read_only, OpenAI::Internal::Type::Boolean + + # @!attribute tool_names + # List of allowed tool names. + # + # @return [Array, nil] + optional :tool_names, OpenAI::Internal::Type::ArrayOf[String] + + # @!method initialize(read_only: nil, tool_names: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always} + # for more details. + # + # A filter object to specify which tools are allowed. + # + # @param read_only [Boolean] Indicates whether or not a tool modifies data or is read-only. If an + # + # @param tool_names [Array] List of allowed tool names. + end + + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter#never + class Never < OpenAI::Internal::Type::BaseModel + # @!attribute read_only + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + # + # @return [Boolean, nil] + optional :read_only, OpenAI::Internal::Type::Boolean + + # @!attribute tool_names + # List of allowed tool names. + # + # @return [Array, nil] + optional :tool_names, OpenAI::Internal::Type::ArrayOf[String] + + # @!method initialize(read_only: nil, tool_names: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never} + # for more details. + # + # A filter object to specify which tools are allowed. + # + # @param read_only [Boolean] Indicates whether or not a tool modifies data or is read-only. If an + # + # @param tool_names [Array] List of allowed tool names. + end + end + + # Specify a single approval policy for all tools. One of `always` or `never`. When + # set to `always`, all tools will require approval. When set to `never`, all tools + # will not require approval. + module McpToolApprovalSetting + extend OpenAI::Internal::Type::Enum + + ALWAYS = :always + NEVER = :never + + # @!method self.values + # @return [Array] + end + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting)] + end end + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::Models, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool)] end - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. @@ -486,21 +925,21 @@ module Tracing class TracingConfiguration < OpenAI::Internal::Type::BaseModel # @!attribute group_id # The group id to attach to this trace to enable filtering and grouping in the - # traces dashboard. + # Traces Dashboard. # # @return [String, nil] optional :group_id, String # @!attribute metadata - # The arbitrary metadata to attach to this trace to enable filtering in the traces - # dashboard. + # The arbitrary metadata to attach to this trace to enable filtering in the Traces + # Dashboard. # # @return [Object, nil] optional :metadata, OpenAI::Internal::Type::Unknown # @!attribute workflow_name # The name of the workflow to attach to this trace. This is used to name the trace - # in the traces dashboard. + # in the Traces Dashboard. # # @return [String, nil] optional :workflow_name, String @@ -523,53 +962,16 @@ class TracingConfiguration < OpenAI::Internal::Type::BaseModel # @return [Array(Symbol, :auto, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration)] end - # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse#turn_detection - class TurnDetection < OpenAI::Internal::Type::BaseModel - # @!attribute prefix_padding_ms - # Amount of audio to include before the VAD detected speech (in milliseconds). - # Defaults to 300ms. - # - # @return [Integer, nil] - optional :prefix_padding_ms, Integer - - # @!attribute silence_duration_ms - # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. - # With shorter values the model will respond more quickly, but may jump in on - # short pauses from the user. - # - # @return [Integer, nil] - optional :silence_duration_ms, Integer - - # @!attribute threshold - # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher - # threshold will require louder audio to activate the model, and thus might - # perform better in noisy environments. - # - # @return [Float, nil] - optional :threshold, Float + # The type of session to create. Always `realtime` for the Realtime API. + # + # @see OpenAI::Models::Realtime::RealtimeSessionCreateResponse#type + module Type + extend OpenAI::Internal::Type::Enum - # @!attribute type - # Type of turn detection, only `server_vad` is currently supported. - # - # @return [String, nil] - optional :type, String + REALTIME = :realtime - # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeSessionCreateResponse::TurnDetection} for - # more details. - # - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - # - # @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in - # - # @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults - # - # @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A - # - # @param type [String] Type of turn detection, only `server_vad` is currently supported. + # @!method self.values + # @return [Array] end end end diff --git a/lib/openai/models/realtime/realtime_tools_config_union.rb b/lib/openai/models/realtime/realtime_tools_config_union.rb index ded14276..4919d145 100644 --- a/lib/openai/models/realtime/realtime_tools_config_union.rb +++ b/lib/openai/models/realtime/realtime_tools_config_union.rb @@ -11,63 +11,12 @@ module RealtimeToolsConfigUnion discriminator :type - variant :function, -> { OpenAI::Realtime::RealtimeToolsConfigUnion::Function } + variant :function, -> { OpenAI::Realtime::Models } # Give the model access to additional tools via remote Model Context Protocol # (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). variant :mcp, -> { OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp } - class Function < OpenAI::Internal::Type::BaseModel - # @!attribute description - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - # - # @return [String, nil] - optional :description, String - - # @!attribute name - # The name of the function. - # - # @return [String, nil] - optional :name, String - - # @!attribute parameters - # Parameters of the function in JSON Schema. - # - # @return [Object, nil] - optional :parameters, OpenAI::Internal::Type::Unknown - - # @!attribute type - # The type of the tool, i.e. `function`. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::Type, nil] - optional :type, enum: -> { OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type } - - # @!method initialize(description: nil, name: nil, parameters: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function} for more details. - # - # @param description [String] The description of the function, including guidance on when and how - # - # @param name [String] The name of the function. - # - # @param parameters [Object] Parameters of the function in JSON Schema. - # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::Type] The type of the tool, i.e. `function`. - - # The type of the tool, i.e. `function`. - # - # @see OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function#type - module Type - extend OpenAI::Internal::Type::Enum - - FUNCTION = :function - - # @!method self.values - # @return [Array] - end - end - class Mcp < OpenAI::Internal::Type::BaseModel # @!attribute server_label # A label for this MCP server, used to identify it in tool calls. @@ -372,7 +321,7 @@ module McpToolApprovalSetting end # @!method self.variants - # @return [Array(OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp)] + # @return [Array(OpenAI::Models::Realtime::Models, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp)] end end end diff --git a/lib/openai/models/realtime/realtime_tracing_config.rb b/lib/openai/models/realtime/realtime_tracing_config.rb index fe101a2b..2b5f87ff 100644 --- a/lib/openai/models/realtime/realtime_tracing_config.rb +++ b/lib/openai/models/realtime/realtime_tracing_config.rb @@ -3,8 +3,9 @@ module OpenAI module Models module Realtime - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. @@ -20,21 +21,21 @@ module RealtimeTracingConfig class TracingConfiguration < OpenAI::Internal::Type::BaseModel # @!attribute group_id # The group id to attach to this trace to enable filtering and grouping in the - # traces dashboard. + # Traces Dashboard. # # @return [String, nil] optional :group_id, String # @!attribute metadata - # The arbitrary metadata to attach to this trace to enable filtering in the traces - # dashboard. + # The arbitrary metadata to attach to this trace to enable filtering in the Traces + # Dashboard. # # @return [Object, nil] optional :metadata, OpenAI::Internal::Type::Unknown # @!attribute workflow_name # The name of the workflow to attach to this trace. This is used to name the trace - # in the traces dashboard. + # in the Traces Dashboard. # # @return [String, nil] optional :workflow_name, String diff --git a/lib/openai/models/realtime/realtime_transcription_session_audio.rb b/lib/openai/models/realtime/realtime_transcription_session_audio.rb new file mode 100644 index 00000000..fc0e75f5 --- /dev/null +++ b/lib/openai/models/realtime/realtime_transcription_session_audio.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionAudio < OpenAI::Internal::Type::BaseModel + # @!attribute input + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput, nil] + optional :input, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput } + + # @!method initialize(input: nil) + # Configuration for input and output audio. + # + # @param input [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput] + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb b/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb new file mode 100644 index 00000000..a4019b2c --- /dev/null +++ b/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionAudioInput < OpenAI::Internal::Type::BaseModel + # @!attribute format_ + # The PCM audio format. Only a 24kHz sample rate is supported. + # + # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil] + optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format + + # @!attribute noise_reduction + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction, nil] + optional :noise_reduction, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction } + + # @!attribute transcription + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. + # + # @return [OpenAI::Models::Realtime::AudioTranscription, nil] + optional :transcription, -> { OpenAI::Realtime::AudioTranscription } + + # @!attribute turn_detection + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection, nil] + optional :turn_detection, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection } + + # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput} for more + # details. + # + # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The PCM audio format. Only a 24kHz sample rate is supported. + # + # @param noise_reduction [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn + # + # @param transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription, defaults to off and can be set to ` + # + # @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + + # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput#noise_reduction + class NoiseReduction < OpenAI::Internal::Type::BaseModel + # @!attribute type + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + # + # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil] + optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType } + + # @!method initialize(type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction} + # for more details. + # + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + # + # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb b/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb new file mode 100644 index 00000000..984b2774 --- /dev/null +++ b/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel + # @!attribute create_response + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + # + # @return [Boolean, nil] + optional :create_response, OpenAI::Internal::Type::Boolean + + # @!attribute eagerness + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness, nil] + optional :eagerness, + enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness } + + # @!attribute idle_timeout_ms + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + # + # @return [Integer, nil] + optional :idle_timeout_ms, Integer, nil?: true + + # @!attribute interrupt_response + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + # + # @return [Boolean, nil] + optional :interrupt_response, OpenAI::Internal::Type::Boolean + + # @!attribute prefix_padding_ms + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. + # + # @return [Integer, nil] + optional :prefix_padding_ms, Integer + + # @!attribute silence_duration_ms + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. + # + # @return [Integer, nil] + optional :silence_duration_ms, Integer + + # @!attribute threshold + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. + # + # @return [Float, nil] + optional :threshold, Float + + # @!attribute type + # Type of turn detection. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type } + + # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection} + # for more details. + # + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + # + # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs + # + # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # + # @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when + # + # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th + # + # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec + # + # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m + # + # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # + # @param type [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type] Type of turn detection. + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. + # + # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection#eagerness + module Eagerness + extend OpenAI::Internal::Type::Enum + + LOW = :low + MEDIUM = :medium + HIGH = :high + AUTO = :auto + + # @!method self.values + # @return [Array] + end + + # Type of turn detection. + # + # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection#type + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD = :server_vad + SEMANTIC_VAD = :semantic_vad + + # @!method self.values + # @return [Array] + end + end + end + end +end diff --git a/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb b/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb new file mode 100644 index 00000000..b50cf6d9 --- /dev/null +++ b/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionClientSecret < OpenAI::Internal::Type::BaseModel + # @!attribute expires_at + # Timestamp for when the token expires. Currently, all tokens expire after one + # minute. + # + # @return [Integer] + required :expires_at, Integer + + # @!attribute value + # Ephemeral key usable in client environments to authenticate connections to the + # Realtime API. Use this in client-side environments rather than a standard API + # token, which should only be used server-side. + # + # @return [String] + required :value, String + + # @!method initialize(expires_at:, value:) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionClientSecret} for more + # details. + # + # Ephemeral key returned by the API. Only present when the session is created on + # the server via REST API. + # + # @param expires_at [Integer] Timestamp for when the token expires. Currently, all tokens expire + # + # @param value [String] Ephemeral key usable in client environments to authenticate connections + end + end + + RealtimeTranscriptionSessionClientSecret = Realtime::RealtimeTranscriptionSessionClientSecret + end +end diff --git a/lib/openai/models/realtime/realtime_transcription_session_create_request.rb b/lib/openai/models/realtime/realtime_transcription_session_create_request.rb index 0c3c073b..1db17ba5 100644 --- a/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +++ b/lib/openai/models/realtime/realtime_transcription_session_create_request.rb @@ -4,14 +4,6 @@ module OpenAI module Models module Realtime class RealtimeTranscriptionSessionCreateRequest < OpenAI::Internal::Type::BaseModel - # @!attribute model - # ID of the model to use. The options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source - # Whisper V2 model). - # - # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model] - required :model, union: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model } - # @!attribute type # The type of session to create. Always `transcription` for transcription # sessions. @@ -19,106 +11,35 @@ class RealtimeTranscriptionSessionCreateRequest < OpenAI::Internal::Type::BaseMo # @return [Symbol, :transcription] required :type, const: :transcription + # @!attribute audio + # Configuration for input and output audio. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudio, nil] + optional :audio, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudio } + # @!attribute include - # The set of items to include in the transcription. Current available items are: + # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs` + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. # # @return [Array, nil] optional :include, -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include] } - # @!attribute input_audio_format - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat, nil] - optional :input_audio_format, - enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat } - - # @!attribute input_audio_noise_reduction - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - # - # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction, nil] - optional :input_audio_noise_reduction, - -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction } - - # @!attribute input_audio_transcription - # Configuration for input audio transcription. The client can optionally set the - # language and prompt for transcription, these offer additional guidance to the - # transcription service. - # - # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription, nil] - optional :input_audio_transcription, - -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription } - - # @!attribute turn_detection - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - # - # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection, nil] - optional :turn_detection, - -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection } - - # @!method initialize(model:, include: nil, input_audio_format: nil, input_audio_noise_reduction: nil, input_audio_transcription: nil, turn_detection: nil, type: :transcription) + # @!method initialize(audio: nil, include: nil, type: :transcription) # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest} for more # details. # # Realtime transcription session object configuration. # - # @param model [String, Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model] ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transc - # - # @param include [Array] The set of items to include in the transcription. Current available items are: - # - # @param input_audio_format [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @param input_audio_noise_reduction [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn + # @param audio [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudio] Configuration for input and output audio. # - # @param input_audio_transcription [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription] Configuration for input audio transcription. The client can optionally set the l - # - # @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server VAD m + # @param include [Array] Additional fields to include in server outputs. # # @param type [Symbol, :transcription] The type of session to create. Always `transcription` for transcription sessions - # ID of the model to use. The options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source - # Whisper V2 model). - # - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#model - module Model - extend OpenAI::Internal::Type::Union - - variant String - - variant const: -> { OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::WHISPER_1 } - - variant const: -> { OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::GPT_4O_TRANSCRIBE } - - variant const: -> { OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::GPT_4O_MINI_TRANSCRIBE } - - # @!method self.variants - # @return [Array(String, Symbol)] - - define_sorbet_constant!(:Variants) do - T.type_alias { T.any(String, OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol) } - end - - # @!group - - WHISPER_1 = :"whisper-1" - GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" - - # @!endgroup - end - module Include extend OpenAI::Internal::Type::Enum @@ -127,185 +48,6 @@ module Include # @!method self.values # @return [Array] end - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - # - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#input_audio_format - module InputAudioFormat - extend OpenAI::Internal::Type::Enum - - PCM16 = :pcm16 - G711_ULAW = :g711_ulaw - G711_ALAW = :g711_alaw - - # @!method self.values - # @return [Array] - end - - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#input_audio_noise_reduction - class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel - # @!attribute type - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type, nil] - optional :type, - enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type } - - # @!method initialize(type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction} - # for more details. - # - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type] Type of noise reduction. `near_field` is for close-talking microphones such as h - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - # - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction#type - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD = :near_field - FAR_FIELD = :far_field - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#input_audio_transcription - class InputAudioTranscription < OpenAI::Internal::Type::BaseModel - # @!attribute language - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - # - # @return [String, nil] - optional :language, String - - # @!attribute model - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model, nil] - optional :model, - enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model } - - # @!attribute prompt - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - # - # @return [String, nil] - optional :prompt, String - - # @!method initialize(language: nil, model: nil, prompt: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription} - # for more details. - # - # Configuration for input audio transcription. The client can optionally set the - # language and prompt for transcription, these offer additional guidance to the - # transcription service. - # - # @param language [String] The language of the input audio. Supplying the input language in - # - # @param model [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model] The model to use for transcription, current options are `gpt-4o-transcribe`, `gp - # - # @param prompt [String] An optional text to guide the model's style or continue a previous audio - - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - # - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription#model - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" - WHISPER_1 = :"whisper-1" - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#turn_detection - class TurnDetection < OpenAI::Internal::Type::BaseModel - # @!attribute prefix_padding_ms - # Amount of audio to include before the VAD detected speech (in milliseconds). - # Defaults to 300ms. - # - # @return [Integer, nil] - optional :prefix_padding_ms, Integer - - # @!attribute silence_duration_ms - # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. - # With shorter values the model will respond more quickly, but may jump in on - # short pauses from the user. - # - # @return [Integer, nil] - optional :silence_duration_ms, Integer - - # @!attribute threshold - # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher - # threshold will require louder audio to activate the model, and thus might - # perform better in noisy environments. - # - # @return [Float, nil] - optional :threshold, Float - - # @!attribute type - # Type of turn detection. Only `server_vad` is currently supported for - # transcription sessions. - # - # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type, nil] - optional :type, - enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type } - - # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection} - # for more details. - # - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - # - # @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in - # - # @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults - # - # @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A - # - # @param type [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type] Type of turn detection. Only `server_vad` is currently supported for transcripti - - # Type of turn detection. Only `server_vad` is currently supported for - # transcription sessions. - # - # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection#type - module Type - extend OpenAI::Internal::Type::Enum - - SERVER_VAD = :server_vad - - # @!method self.values - # @return [Array] - end - end end end end diff --git a/lib/openai/models/realtime/realtime_transcription_session_create_response.rb b/lib/openai/models/realtime/realtime_transcription_session_create_response.rb new file mode 100644 index 00000000..59a1518e --- /dev/null +++ b/lib/openai/models/realtime/realtime_transcription_session_create_response.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel + # @!attribute client_secret + # Ephemeral key returned by the API. Only present when the session is created on + # the server via REST API. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionClientSecret] + required :client_secret, -> { OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret } + + # @!attribute input_audio_format + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # + # @return [String, nil] + optional :input_audio_format, String + + # @!attribute input_audio_transcription + # Configuration of the transcription model. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription, nil] + optional :input_audio_transcription, + -> { OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription } + + # @!attribute modalities + # The set of modalities the model can respond with. To disable audio, set this to + # ["text"]. + # + # @return [Array, nil] + optional :modalities, + -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality] } + + # @!attribute turn_detection + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection, nil] + optional :turn_detection, -> { OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection } + + # @!method initialize(client_secret:, input_audio_format: nil, input_audio_transcription: nil, modalities: nil, turn_detection: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse} for more + # details. + # + # A new Realtime transcription session configuration. + # + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. + # + # @param client_secret [OpenAI::Models::Realtime::RealtimeTranscriptionSessionClientSecret] Ephemeral key returned by the API. Only present when the session is + # + # @param input_audio_format [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # + # @param input_audio_transcription [OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription] Configuration of the transcription model. + # + # @param modalities [Array] The set of modalities the model can respond with. To disable audio, + # + # @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server + + module Modality + extend OpenAI::Internal::Type::Enum + + TEXT = :text + AUDIO = :audio + + # @!method self.values + # @return [Array] + end + end + end + + RealtimeTranscriptionSessionCreateResponse = Realtime::RealtimeTranscriptionSessionCreateResponse + end +end diff --git a/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb b/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb new file mode 100644 index 00000000..a8b4c51e --- /dev/null +++ b/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionInputAudioTranscription < OpenAI::Internal::Type::BaseModel + # @!attribute language + # The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + # + # @return [String, nil] + optional :language, String + + # @!attribute model + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + # + # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model, nil] + optional :model, enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model } + + # @!attribute prompt + # An optional text to guide the model's style or continue a previous audio + # segment. For `whisper-1`, the + # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). + # For `gpt-4o-transcribe` models, the prompt is a free text string, for example + # "expect words related to technology". + # + # @return [String, nil] + optional :prompt, String + + # @!method initialize(language: nil, model: nil, prompt: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription} + # for more details. + # + # Configuration of the transcription model. + # + # @param language [String] The language of the input audio. Supplying the input language in + # + # @param model [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-tra + # + # @param prompt [String] An optional text to guide the model's style or continue a previous audio + + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + # + # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription#model + module Model + extend OpenAI::Internal::Type::Enum + + WHISPER_1 = :"whisper-1" + GPT_4O_TRANSCRIBE_LATEST = :"gpt-4o-transcribe-latest" + GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" + GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" + + # @!method self.values + # @return [Array] + end + end + end + + RealtimeTranscriptionSessionInputAudioTranscription = + Realtime::RealtimeTranscriptionSessionInputAudioTranscription + end +end diff --git a/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb b/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb new file mode 100644 index 00000000..e7f1ba0c --- /dev/null +++ b/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionTurnDetection < OpenAI::Internal::Type::BaseModel + # @!attribute prefix_padding_ms + # Amount of audio to include before the VAD detected speech (in milliseconds). + # Defaults to 300ms. + # + # @return [Integer, nil] + optional :prefix_padding_ms, Integer + + # @!attribute silence_duration_ms + # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. + # With shorter values the model will respond more quickly, but may jump in on + # short pauses from the user. + # + # @return [Integer, nil] + optional :silence_duration_ms, Integer + + # @!attribute threshold + # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher + # threshold will require louder audio to activate the model, and thus might + # perform better in noisy environments. + # + # @return [Float, nil] + optional :threshold, Float + + # @!attribute type + # Type of turn detection, only `server_vad` is currently supported. + # + # @return [String, nil] + optional :type, String + + # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection} for more + # details. + # + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + # + # @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in + # + # @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults + # + # @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A + # + # @param type [String] Type of turn detection, only `server_vad` is currently supported. + end + end + + RealtimeTranscriptionSessionTurnDetection = Realtime::RealtimeTranscriptionSessionTurnDetection + end +end diff --git a/lib/openai/models/realtime/realtime_truncation.rb b/lib/openai/models/realtime/realtime_truncation.rb index 01fbef52..7220e112 100644 --- a/lib/openai/models/realtime/realtime_truncation.rb +++ b/lib/openai/models/realtime/realtime_truncation.rb @@ -4,18 +4,19 @@ module OpenAI module Models module Realtime # Controls how the realtime conversation is truncated prior to model inference. - # The default is `auto`. When set to `retention_ratio`, the server retains a - # fraction of the conversation tokens prior to the instructions. + # The default is `auto`. module RealtimeTruncation extend OpenAI::Internal::Type::Union - # The truncation strategy to use for the session. + # The truncation strategy to use for the session. `auto` is the default truncation strategy. `disabled` will disable truncation and emit errors when the conversation exceeds the input token limit. variant enum: -> { OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy } - # Retain a fraction of the conversation tokens. - variant -> { OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation } + # Retain a fraction of the conversation tokens when the conversation exceeds the input token limit. This allows you to amortize truncations across multiple turns, which can help improve cached token usage. + variant -> { OpenAI::Realtime::RealtimeTruncationRetentionRatio } - # The truncation strategy to use for the session. + # The truncation strategy to use for the session. `auto` is the default truncation + # strategy. `disabled` will disable truncation and emit errors when the + # conversation exceeds the input token limit. module RealtimeTruncationStrategy extend OpenAI::Internal::Type::Enum @@ -26,41 +27,8 @@ module RealtimeTruncationStrategy # @return [Array] end - class RetentionRatioTruncation < OpenAI::Internal::Type::BaseModel - # @!attribute retention_ratio - # Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0). - # - # @return [Float] - required :retention_ratio, Float - - # @!attribute type - # Use retention ratio truncation. - # - # @return [Symbol, :retention_ratio] - required :type, const: :retention_ratio - - # @!attribute post_instructions_token_limit - # Optional cap on tokens allowed after the instructions. - # - # @return [Integer, nil] - optional :post_instructions_token_limit, Integer, nil?: true - - # @!method initialize(retention_ratio:, post_instructions_token_limit: nil, type: :retention_ratio) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation} for - # more details. - # - # Retain a fraction of the conversation tokens. - # - # @param retention_ratio [Float] Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0). - # - # @param post_instructions_token_limit [Integer, nil] Optional cap on tokens allowed after the instructions. - # - # @param type [Symbol, :retention_ratio] Use retention ratio truncation. - end - # @!method self.variants - # @return [Array(Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation)] + # @return [Array(Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio)] end end end diff --git a/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb b/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb new file mode 100644 index 00000000..43e6778e --- /dev/null +++ b/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module OpenAI + module Models + module Realtime + class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel + # @!attribute retention_ratio + # Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the + # conversation exceeds the input token limit. + # + # @return [Float] + required :retention_ratio, Float + + # @!attribute type + # Use retention ratio truncation. + # + # @return [Symbol, :retention_ratio] + required :type, const: :retention_ratio + + # @!method initialize(retention_ratio:, type: :retention_ratio) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio} for more details. + # + # Retain a fraction of the conversation tokens when the conversation exceeds the + # input token limit. This allows you to amortize truncations across multiple + # turns, which can help improve cached token usage. + # + # @param retention_ratio [Float] Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the + # + # @param type [Symbol, :retention_ratio] Use retention ratio truncation. + end + end + end +end diff --git a/lib/openai/models/realtime/response_cancel_event.rb b/lib/openai/models/realtime/response_cancel_event.rb index 9f37ad9f..9133324c 100644 --- a/lib/openai/models/realtime/response_cancel_event.rb +++ b/lib/openai/models/realtime/response_cancel_event.rb @@ -29,7 +29,9 @@ class ResponseCancelEvent < OpenAI::Internal::Type::BaseModel # # Send this event to cancel an in-progress response. The server will respond with # a `response.done` event with a status of `response.status=cancelled`. If there - # is no response to cancel, the server will respond with an error. + # is no response to cancel, the server will respond with an error. It's safe to + # call `response.cancel` even if no response is in progress, an error will be + # returned the session will remain unaffected. # # @param event_id [String] Optional client-generated ID used to identify this event. # diff --git a/lib/openai/models/realtime/response_create_event.rb b/lib/openai/models/realtime/response_create_event.rb index 68ac2112..e61f6846 100644 --- a/lib/openai/models/realtime/response_create_event.rb +++ b/lib/openai/models/realtime/response_create_event.rb @@ -19,8 +19,8 @@ class ResponseCreateEvent < OpenAI::Internal::Type::BaseModel # @!attribute response # Create a new Realtime response with these parameters # - # @return [OpenAI::Models::Realtime::ResponseCreateEvent::Response, nil] - optional :response, -> { OpenAI::Realtime::ResponseCreateEvent::Response } + # @return [OpenAI::Models::Realtime::RealtimeResponseCreateParams, nil] + optional :response, -> { OpenAI::Realtime::RealtimeResponseCreateParams } # @!method initialize(event_id: nil, response: nil, type: :"response.create") # This event instructs the server to create a Response, which means triggering @@ -29,362 +29,32 @@ class ResponseCreateEvent < OpenAI::Internal::Type::BaseModel # # A Response will include at least one Item, and may have two, in which case the # second will be a function call. These Items will be appended to the conversation - # history. + # history by default. # # The server will respond with a `response.created` event, events for Items and # content created, and finally a `response.done` event to indicate the Response is # complete. # - # The `response.create` event includes inference configuration like - # `instructions`, and `temperature`. These fields will override the Session's - # configuration for this Response only. + # The `response.create` event includes inference configuration like `instructions` + # and `tools`. If these are set, they will override the Session's configuration + # for this Response only. + # + # Responses can be created out-of-band of the default Conversation, meaning that + # they can have arbitrary input, and it's possible to disable writing the output + # to the Conversation. Only one Response can write to the default Conversation at + # a time, but otherwise multiple Responses can be created in parallel. The + # `metadata` field is a good way to disambiguate multiple simultaneous Responses. + # + # Clients can set `conversation` to `none` to create a Response that does not + # write to the default Conversation. Arbitrary input can be provided with the + # `input` field, which is an array accepting raw Items and references to existing + # Items. # # @param event_id [String] Optional client-generated ID used to identify this event. # - # @param response [OpenAI::Models::Realtime::ResponseCreateEvent::Response] Create a new Realtime response with these parameters + # @param response [OpenAI::Models::Realtime::RealtimeResponseCreateParams] Create a new Realtime response with these parameters # # @param type [Symbol, :"response.create"] The event type, must be `response.create`. - - # @see OpenAI::Models::Realtime::ResponseCreateEvent#response - class Response < OpenAI::Internal::Type::BaseModel - # @!attribute conversation - # Controls which conversation the response is added to. Currently supports `auto` - # and `none`, with `auto` as the default value. The `auto` value means that the - # contents of the response will be added to the default conversation. Set this to - # `none` to create an out-of-band response which will not add items to default - # conversation. - # - # @return [String, Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::Conversation, nil] - optional :conversation, union: -> { OpenAI::Realtime::ResponseCreateEvent::Response::Conversation } - - # @!attribute input - # Input items to include in the prompt for the model. Using this field creates a - # new context for this Response instead of using the default conversation. An - # empty array `[]` will clear the context for this Response. Note that this can - # include references to items from the default conversation. - # - # @return [Array, nil] - optional :input, -> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Realtime::ConversationItem] } - - # @!attribute instructions - # The default system instructions (i.e. system message) prepended to model calls. - # This field allows the client to guide the model on desired responses. The model - # can be instructed on response content and format, (e.g. "be extremely succinct", - # "act friendly", "here are examples of good responses") and on audio behavior - # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The - # instructions are not guaranteed to be followed by the model, but they provide - # guidance to the model on the desired behavior. - # - # Note that the server sets default instructions which will be used if this field - # is not set and are visible in the `session.created` event at the start of the - # session. - # - # @return [String, nil] - optional :instructions, String - - # @!attribute max_output_tokens - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or - # `inf` for the maximum available tokens for a given model. Defaults to `inf`. - # - # @return [Integer, Symbol, :inf, nil] - optional :max_output_tokens, - union: -> { OpenAI::Realtime::ResponseCreateEvent::Response::MaxOutputTokens } - - # @!attribute metadata - # Set of 16 key-value pairs that can be attached to an object. This can be useful - # for storing additional information about the object in a structured format, and - # querying for objects via API or the dashboard. - # - # Keys are strings with a maximum length of 64 characters. Values are strings with - # a maximum length of 512 characters. - # - # @return [Hash{Symbol=>String}, nil] - optional :metadata, OpenAI::Internal::Type::HashOf[String], nil?: true - - # @!attribute modalities - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. - # - # @return [Array, nil] - optional :modalities, - -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::ResponseCreateEvent::Response::Modality] } - - # @!attribute output_audio_format - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @return [Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::OutputAudioFormat, nil] - optional :output_audio_format, - enum: -> { OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat } - - # @!attribute prompt - # Reference to a prompt template and its variables. - # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). - # - # @return [OpenAI::Models::Responses::ResponsePrompt, nil] - optional :prompt, -> { OpenAI::Responses::ResponsePrompt }, nil?: true - - # @!attribute temperature - # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - # - # @return [Float, nil] - optional :temperature, Float - - # @!attribute tool_choice - # How the model chooses tools. Provide one of the string modes or force a specific - # function/MCP tool. - # - # @return [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp, nil] - optional :tool_choice, union: -> { OpenAI::Realtime::ResponseCreateEvent::Response::ToolChoice } - - # @!attribute tools - # Tools (functions) available to the model. - # - # @return [Array, nil] - optional :tools, - -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Realtime::ResponseCreateEvent::Response::Tool] } - - # @!attribute voice - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - # - # @return [String, Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice, nil] - optional :voice, union: -> { OpenAI::Realtime::ResponseCreateEvent::Response::Voice } - - # @!method initialize(conversation: nil, input: nil, instructions: nil, max_output_tokens: nil, metadata: nil, modalities: nil, output_audio_format: nil, prompt: nil, temperature: nil, tool_choice: nil, tools: nil, voice: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::ResponseCreateEvent::Response} for more details. - # - # Create a new Realtime response with these parameters - # - # @param conversation [String, Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::Conversation] Controls which conversation the response is added to. Currently supports - # - # @param input [Array] Input items to include in the prompt for the model. Using this field - # - # @param instructions [String] The default system instructions (i.e. system message) prepended to model - # - # @param max_output_tokens [Integer, Symbol, :inf] Maximum number of output tokens for a single assistant response, - # - # @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. This can be - # - # @param modalities [Array] The set of modalities the model can respond with. To disable audio, - # - # @param output_audio_format [Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::OutputAudioFormat] The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @param prompt [OpenAI::Models::Responses::ResponsePrompt, nil] Reference to a prompt template and its variables. - # - # @param temperature [Float] Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - # - # @param tool_choice [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp] How the model chooses tools. Provide one of the string modes or force a specific - # - # @param tools [Array] Tools (functions) available to the model. - # - # @param voice [String, Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice] The voice the model uses to respond. Voice cannot be changed during the - - # Controls which conversation the response is added to. Currently supports `auto` - # and `none`, with `auto` as the default value. The `auto` value means that the - # contents of the response will be added to the default conversation. Set this to - # `none` to create an out-of-band response which will not add items to default - # conversation. - # - # @see OpenAI::Models::Realtime::ResponseCreateEvent::Response#conversation - module Conversation - extend OpenAI::Internal::Type::Union - - variant String - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Conversation::AUTO } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Conversation::NONE } - - # @!method self.variants - # @return [Array(String, Symbol)] - - define_sorbet_constant!(:Variants) do - T.type_alias { T.any(String, OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::TaggedSymbol) } - end - - # @!group - - AUTO = :auto - NONE = :none - - # @!endgroup - end - - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or - # `inf` for the maximum available tokens for a given model. Defaults to `inf`. - # - # @see OpenAI::Models::Realtime::ResponseCreateEvent::Response#max_output_tokens - module MaxOutputTokens - extend OpenAI::Internal::Type::Union - - variant Integer - - variant const: :inf - - # @!method self.variants - # @return [Array(Integer, Symbol, :inf)] - end - - module Modality - extend OpenAI::Internal::Type::Enum - - TEXT = :text - AUDIO = :audio - - # @!method self.values - # @return [Array] - end - - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @see OpenAI::Models::Realtime::ResponseCreateEvent::Response#output_audio_format - module OutputAudioFormat - extend OpenAI::Internal::Type::Enum - - PCM16 = :pcm16 - G711_ULAW = :g711_ulaw - G711_ALAW = :g711_alaw - - # @!method self.values - # @return [Array] - end - - # How the model chooses tools. Provide one of the string modes or force a specific - # function/MCP tool. - # - # @see OpenAI::Models::Realtime::ResponseCreateEvent::Response#tool_choice - module ToolChoice - extend OpenAI::Internal::Type::Union - - # Controls which (if any) tool is called by the model. - # - # `none` means the model will not call any tool and instead generates a message. - # - # `auto` means the model can pick between generating a message or calling one or - # more tools. - # - # `required` means the model must call one or more tools. - variant enum: -> { OpenAI::Responses::ToolChoiceOptions } - - # Use this option to force the model to call a specific function. - variant -> { OpenAI::Responses::ToolChoiceFunction } - - # Use this option to force the model to call a specific tool on a remote MCP server. - variant -> { OpenAI::Responses::ToolChoiceMcp } - - # @!method self.variants - # @return [Array(Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp)] - end - - class Tool < OpenAI::Internal::Type::BaseModel - # @!attribute description - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - # - # @return [String, nil] - optional :description, String - - # @!attribute name - # The name of the function. - # - # @return [String, nil] - optional :name, String - - # @!attribute parameters - # Parameters of the function in JSON Schema. - # - # @return [Object, nil] - optional :parameters, OpenAI::Internal::Type::Unknown - - # @!attribute type - # The type of the tool, i.e. `function`. - # - # @return [Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::Type, nil] - optional :type, enum: -> { OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type } - - # @!method initialize(description: nil, name: nil, parameters: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool} for more - # details. - # - # @param description [String] The description of the function, including guidance on when and how - # - # @param name [String] The name of the function. - # - # @param parameters [Object] Parameters of the function in JSON Schema. - # - # @param type [Symbol, OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::Type] The type of the tool, i.e. `function`. - - # The type of the tool, i.e. `function`. - # - # @see OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool#type - module Type - extend OpenAI::Internal::Type::Enum - - FUNCTION = :function - - # @!method self.values - # @return [Array] - end - end - - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - # - # @see OpenAI::Models::Realtime::ResponseCreateEvent::Response#voice - module Voice - extend OpenAI::Internal::Type::Union - - variant String - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::ALLOY } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::ASH } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::BALLAD } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::CORAL } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::ECHO } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::SAGE } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::SHIMMER } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::VERSE } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::MARIN } - - variant const: -> { OpenAI::Models::Realtime::ResponseCreateEvent::Response::Voice::CEDAR } - - # @!method self.variants - # @return [Array(String, Symbol)] - - define_sorbet_constant!(:Variants) do - T.type_alias { T.any(String, OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol) } - end - - # @!group - - ALLOY = :alloy - ASH = :ash - BALLAD = :ballad - CORAL = :coral - ECHO = :echo - SAGE = :sage - SHIMMER = :shimmer - VERSE = :verse - MARIN = :marin - CEDAR = :cedar - - # @!endgroup - end - end end end end diff --git a/lib/openai/models/realtime/response_done_event.rb b/lib/openai/models/realtime/response_done_event.rb index 4fd97cf2..0de4f32a 100644 --- a/lib/openai/models/realtime/response_done_event.rb +++ b/lib/openai/models/realtime/response_done_event.rb @@ -27,6 +27,13 @@ class ResponseDoneEvent < OpenAI::Internal::Type::BaseModel # state. The Response object included in the `response.done` event will include # all output Items in the Response but will omit the raw audio data. # + # Clients should check the `status` field of the Response to determine if it was + # successful (`completed`) or if there was another outcome: `cancelled`, `failed`, + # or `incomplete`. + # + # A response will contain all output items that were generated during the + # response, excluding any audio content. + # # @param event_id [String] The unique ID of the server event. # # @param response [OpenAI::Models::Realtime::RealtimeResponse] The response resource. diff --git a/lib/openai/models/realtime/session_created_event.rb b/lib/openai/models/realtime/session_created_event.rb index b14daac0..ccefcc96 100644 --- a/lib/openai/models/realtime/session_created_event.rb +++ b/lib/openai/models/realtime/session_created_event.rb @@ -11,10 +11,10 @@ class SessionCreatedEvent < OpenAI::Internal::Type::BaseModel required :event_id, String # @!attribute session - # Realtime session object. + # The session configuration. # - # @return [OpenAI::Models::Realtime::RealtimeSession] - required :session, -> { OpenAI::Realtime::RealtimeSession } + # @return [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] + required :session, union: -> { OpenAI::Realtime::SessionCreatedEvent::Session } # @!attribute type # The event type, must be `session.created`. @@ -29,9 +29,25 @@ class SessionCreatedEvent < OpenAI::Internal::Type::BaseModel # # @param event_id [String] The unique ID of the server event. # - # @param session [OpenAI::Models::Realtime::RealtimeSession] Realtime session object. + # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] The session configuration. # # @param type [Symbol, :"session.created"] The event type, must be `session.created`. + + # The session configuration. + # + # @see OpenAI::Models::Realtime::SessionCreatedEvent#session + module Session + extend OpenAI::Internal::Type::Union + + # Realtime session object configuration. + variant -> { OpenAI::Realtime::RealtimeSessionCreateRequest } + + # Realtime transcription session object configuration. + variant -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest } + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest)] + end end end end diff --git a/lib/openai/models/realtime/session_update_event.rb b/lib/openai/models/realtime/session_update_event.rb index db7a5f1f..33c4baea 100644 --- a/lib/openai/models/realtime/session_update_event.rb +++ b/lib/openai/models/realtime/session_update_event.rb @@ -5,10 +5,11 @@ module Models module Realtime class SessionUpdateEvent < OpenAI::Internal::Type::BaseModel # @!attribute session - # Realtime session object configuration. + # Update the Realtime session. Choose either a realtime session or a transcription + # session. # - # @return [OpenAI::Models::Realtime::RealtimeSessionCreateRequest] - required :session, -> { OpenAI::Realtime::RealtimeSessionCreateRequest } + # @return [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] + required :session, union: -> { OpenAI::Realtime::SessionUpdateEvent::Session } # @!attribute type # The event type, must be `session.update`. @@ -17,27 +18,50 @@ class SessionUpdateEvent < OpenAI::Internal::Type::BaseModel required :type, const: :"session.update" # @!attribute event_id - # Optional client-generated ID used to identify this event. + # Optional client-generated ID used to identify this event. This is an arbitrary + # string that a client may assign. It will be passed back if there is an error + # with the event, but the corresponding `session.updated` event will not include + # it. # # @return [String, nil] optional :event_id, String # @!method initialize(session:, event_id: nil, type: :"session.update") - # Send this event to update the session’s default configuration. The client may - # send this event at any time to update any field, except for `voice`. However, - # note that once a session has been initialized with a particular `model`, it - # can’t be changed to another model using `session.update`. + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::SessionUpdateEvent} for more details. + # + # Send this event to update the session’s configuration. The client may send this + # event at any time to update any field except for `voice` and `model`. `voice` + # can be updated only if there have been no other audio outputs yet. # # When the server receives a `session.update`, it will respond with a # `session.updated` event showing the full, effective configuration. Only the - # fields that are present are updated. To clear a field like `instructions`, pass - # an empty string. + # fields that are present in the `session.update` are updated. To clear a field + # like `instructions`, pass an empty string. To clear a field like `tools`, pass + # an empty array. To clear a field like `turn_detection`, pass `null`. # - # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateRequest] Realtime session object configuration. + # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] Update the Realtime session. Choose either a realtime # - # @param event_id [String] Optional client-generated ID used to identify this event. + # @param event_id [String] Optional client-generated ID used to identify this event. This is an arbitrary s # # @param type [Symbol, :"session.update"] The event type, must be `session.update`. + + # Update the Realtime session. Choose either a realtime session or a transcription + # session. + # + # @see OpenAI::Models::Realtime::SessionUpdateEvent#session + module Session + extend OpenAI::Internal::Type::Union + + # Realtime session object configuration. + variant -> { OpenAI::Realtime::RealtimeSessionCreateRequest } + + # Realtime transcription session object configuration. + variant -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest } + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest)] + end end end end diff --git a/lib/openai/models/realtime/session_updated_event.rb b/lib/openai/models/realtime/session_updated_event.rb index 7b691eb4..6d4e0023 100644 --- a/lib/openai/models/realtime/session_updated_event.rb +++ b/lib/openai/models/realtime/session_updated_event.rb @@ -11,10 +11,10 @@ class SessionUpdatedEvent < OpenAI::Internal::Type::BaseModel required :event_id, String # @!attribute session - # Realtime session object. + # The session configuration. # - # @return [OpenAI::Models::Realtime::RealtimeSession] - required :session, -> { OpenAI::Realtime::RealtimeSession } + # @return [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] + required :session, union: -> { OpenAI::Realtime::SessionUpdatedEvent::Session } # @!attribute type # The event type, must be `session.updated`. @@ -28,9 +28,25 @@ class SessionUpdatedEvent < OpenAI::Internal::Type::BaseModel # # @param event_id [String] The unique ID of the server event. # - # @param session [OpenAI::Models::Realtime::RealtimeSession] Realtime session object. + # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] The session configuration. # # @param type [Symbol, :"session.updated"] The event type, must be `session.updated`. + + # The session configuration. + # + # @see OpenAI::Models::Realtime::SessionUpdatedEvent#session + module Session + extend OpenAI::Internal::Type::Union + + # Realtime session object configuration. + variant -> { OpenAI::Realtime::RealtimeSessionCreateRequest } + + # Realtime transcription session object configuration. + variant -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest } + + # @!method self.variants + # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest)] + end end end end diff --git a/lib/openai/models/realtime/transcription_session_created.rb b/lib/openai/models/realtime/transcription_session_created.rb index a7cd07f8..2d4e9c46 100644 --- a/lib/openai/models/realtime/transcription_session_created.rb +++ b/lib/openai/models/realtime/transcription_session_created.rb @@ -11,10 +11,14 @@ class TranscriptionSessionCreated < OpenAI::Internal::Type::BaseModel required :event_id, String # @!attribute session - # A Realtime transcription session configuration object. + # A new Realtime transcription session configuration. # - # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session] - required :session, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session } + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] + required :session, -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse } # @!attribute type # The event type, must be `transcription_session.created`. @@ -30,248 +34,9 @@ class TranscriptionSessionCreated < OpenAI::Internal::Type::BaseModel # # @param event_id [String] The unique ID of the server event. # - # @param session [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session] A Realtime transcription session configuration object. + # @param session [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] A new Realtime transcription session configuration. # # @param type [Symbol, :"transcription_session.created"] The event type, must be `transcription_session.created`. - - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated#session - class Session < OpenAI::Internal::Type::BaseModel - # @!attribute id - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - # - # @return [String, nil] - optional :id, String - - # @!attribute audio - # Configuration for input audio for the session. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio, nil] - optional :audio, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio } - - # @!attribute expires_at - # Expiration timestamp for the session, in seconds since epoch. - # - # @return [Integer, nil] - optional :expires_at, Integer - - # @!attribute include - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - # - # @return [Array, nil] - optional :include, - -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::TranscriptionSessionCreated::Session::Include] } - - # @!attribute object - # The object type. Always `realtime.transcription_session`. - # - # @return [String, nil] - optional :object, String - - # @!method initialize(id: nil, audio: nil, expires_at: nil, include: nil, object: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session} for more - # details. - # - # A Realtime transcription session configuration object. - # - # @param id [String] Unique identifier for the session that looks like `sess_1234567890abcdef`. - # - # @param audio [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio] Configuration for input audio for the session. - # - # @param expires_at [Integer] Expiration timestamp for the session, in seconds since epoch. - # - # @param include [Array] Additional fields to include in server outputs. - # - # @param object [String] The object type. Always `realtime.transcription_session`. - - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session#audio - class Audio < OpenAI::Internal::Type::BaseModel - # @!attribute input - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input, nil] - optional :input, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input } - - # @!method initialize(input: nil) - # Configuration for input audio for the session. - # - # @param input [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input] - - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio#input - class Input < OpenAI::Internal::Type::BaseModel - # @!attribute format_ - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @return [String, nil] - optional :format_, String, api_name: :format - - # @!attribute noise_reduction - # Configuration for input audio noise reduction. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, nil] - optional :noise_reduction, - -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction } - - # @!attribute transcription - # Configuration of the transcription model. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, nil] - optional :transcription, - -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription } - - # @!attribute turn_detection - # Configuration for turn detection. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection, nil] - optional :turn_detection, - -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection } - - # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input} - # for more details. - # - # @param format_ [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @param noise_reduction [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction] Configuration for input audio noise reduction. - # - # @param transcription [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription] Configuration of the transcription model. - # - # @param turn_detection [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection] Configuration for turn detection. - - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#noise_reduction - class NoiseReduction < OpenAI::Internal::Type::BaseModel - # @!attribute type - # - # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type, nil] - optional :type, - enum: -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type } - - # @!method initialize(type: nil) - # Configuration for input audio noise reduction. - # - # @param type [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type] - - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction#type - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD = :near_field - FAR_FIELD = :far_field - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#transcription - class Transcription < OpenAI::Internal::Type::BaseModel - # @!attribute language - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - # - # @return [String, nil] - optional :language, String - - # @!attribute model - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - # - # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model, nil] - optional :model, - enum: -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model } - - # @!attribute prompt - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - # - # @return [String, nil] - optional :prompt, String - - # @!method initialize(language: nil, model: nil, prompt: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription} - # for more details. - # - # Configuration of the transcription model. - # - # @param language [String] The language of the input audio. Supplying the input language in - # - # @param model [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model] The model to use for transcription. Can be `gpt-4o-transcribe`, `gpt-4o-mini-tra - # - # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - # - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription#model - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" - WHISPER_1 = :"whisper-1" - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#turn_detection - class TurnDetection < OpenAI::Internal::Type::BaseModel - # @!attribute prefix_padding_ms - # - # @return [Integer, nil] - optional :prefix_padding_ms, Integer - - # @!attribute silence_duration_ms - # - # @return [Integer, nil] - optional :silence_duration_ms, Integer - - # @!attribute threshold - # - # @return [Float, nil] - optional :threshold, Float - - # @!attribute type - # Type of turn detection, only `server_vad` is currently supported. - # - # @return [String, nil] - optional :type, String - - # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection} - # for more details. - # - # Configuration for turn detection. - # - # @param prefix_padding_ms [Integer] - # - # @param silence_duration_ms [Integer] - # - # @param threshold [Float] - # - # @param type [String] Type of turn detection, only `server_vad` is currently supported. - end - end - end - - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs" - - # @!method self.values - # @return [Array] - end - end end end end diff --git a/lib/openai/models/realtime/transcription_session_update.rb b/lib/openai/models/realtime/transcription_session_update.rb index f0be5851..eb7ec409 100644 --- a/lib/openai/models/realtime/transcription_session_update.rb +++ b/lib/openai/models/realtime/transcription_session_update.rb @@ -7,8 +7,8 @@ class TranscriptionSessionUpdate < OpenAI::Internal::Type::BaseModel # @!attribute session # Realtime transcription session object configuration. # - # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] - required :session, -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest } + # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session] + required :session, -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session } # @!attribute type # The event type, must be `transcription_session.update`. @@ -25,11 +25,187 @@ class TranscriptionSessionUpdate < OpenAI::Internal::Type::BaseModel # @!method initialize(session:, event_id: nil, type: :"transcription_session.update") # Send this event to update a transcription session. # - # @param session [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] Realtime transcription session object configuration. + # @param session [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session] Realtime transcription session object configuration. # # @param event_id [String] Optional client-generated ID used to identify this event. # # @param type [Symbol, :"transcription_session.update"] The event type, must be `transcription_session.update`. + + # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate#session + class Session < OpenAI::Internal::Type::BaseModel + # @!attribute include + # The set of items to include in the transcription. Current available items are: + # `item.input_audio_transcription.logprobs` + # + # @return [Array, nil] + optional :include, + -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include] } + + # @!attribute input_audio_format + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + # (mono), and little-endian byte order. + # + # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat, nil] + optional :input_audio_format, + enum: -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat } + + # @!attribute input_audio_noise_reduction + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + # + # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, nil] + optional :input_audio_noise_reduction, + -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction } + + # @!attribute input_audio_transcription + # Configuration for input audio transcription. The client can optionally set the + # language and prompt for transcription, these offer additional guidance to the + # transcription service. + # + # @return [OpenAI::Models::Realtime::AudioTranscription, nil] + optional :input_audio_transcription, -> { OpenAI::Realtime::AudioTranscription } + + # @!attribute turn_detection + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + # + # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection, nil] + optional :turn_detection, -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection } + + # @!method initialize(include: nil, input_audio_format: nil, input_audio_noise_reduction: nil, input_audio_transcription: nil, turn_detection: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session} for more + # details. + # + # Realtime transcription session object configuration. + # + # @param include [Array] The set of items to include in the transcription. Current available items are: + # + # @param input_audio_format [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # + # @param input_audio_noise_reduction [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn + # + # @param input_audio_transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription. The client can optionally set the l + # + # @param turn_detection [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server VAD m + + module Include + extend OpenAI::Internal::Type::Enum + + ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs" + + # @!method self.values + # @return [Array] + end + + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + # (mono), and little-endian byte order. + # + # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#input_audio_format + module InputAudioFormat + extend OpenAI::Internal::Type::Enum + + PCM16 = :pcm16 + G711_ULAW = :g711_ulaw + G711_ALAW = :g711_alaw + + # @!method self.values + # @return [Array] + end + + # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#input_audio_noise_reduction + class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel + # @!attribute type + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + # + # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil] + optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType } + + # @!method initialize(type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction} + # for more details. + # + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + # + # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h + end + + # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#turn_detection + class TurnDetection < OpenAI::Internal::Type::BaseModel + # @!attribute prefix_padding_ms + # Amount of audio to include before the VAD detected speech (in milliseconds). + # Defaults to 300ms. + # + # @return [Integer, nil] + optional :prefix_padding_ms, Integer + + # @!attribute silence_duration_ms + # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. + # With shorter values the model will respond more quickly, but may jump in on + # short pauses from the user. + # + # @return [Integer, nil] + optional :silence_duration_ms, Integer + + # @!attribute threshold + # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher + # threshold will require louder audio to activate the model, and thus might + # perform better in noisy environments. + # + # @return [Float, nil] + optional :threshold, Float + + # @!attribute type + # Type of turn detection. Only `server_vad` is currently supported for + # transcription sessions. + # + # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type, nil] + optional :type, enum: -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type } + + # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) + # Some parameter documentations has been truncated, see + # {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection} + # for more details. + # + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + # + # @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in + # + # @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults + # + # @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A + # + # @param type [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type] Type of turn detection. Only `server_vad` is currently supported for transcripti + + # Type of turn detection. Only `server_vad` is currently supported for + # transcription sessions. + # + # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection#type + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD = :server_vad + + # @!method self.values + # @return [Array] + end + end + end end end end diff --git a/lib/openai/models/realtime/transcription_session_updated_event.rb b/lib/openai/models/realtime/transcription_session_updated_event.rb index 6595f28e..c0b74534 100644 --- a/lib/openai/models/realtime/transcription_session_updated_event.rb +++ b/lib/openai/models/realtime/transcription_session_updated_event.rb @@ -11,10 +11,14 @@ class TranscriptionSessionUpdatedEvent < OpenAI::Internal::Type::BaseModel required :event_id, String # @!attribute session - # A Realtime transcription session configuration object. + # A new Realtime transcription session configuration. # - # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session] - required :session, -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session } + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. + # + # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] + required :session, -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse } # @!attribute type # The event type, must be `transcription_session.updated`. @@ -31,248 +35,9 @@ class TranscriptionSessionUpdatedEvent < OpenAI::Internal::Type::BaseModel # # @param event_id [String] The unique ID of the server event. # - # @param session [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session] A Realtime transcription session configuration object. + # @param session [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] A new Realtime transcription session configuration. # # @param type [Symbol, :"transcription_session.updated"] The event type, must be `transcription_session.updated`. - - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent#session - class Session < OpenAI::Internal::Type::BaseModel - # @!attribute id - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - # - # @return [String, nil] - optional :id, String - - # @!attribute audio - # Configuration for input audio for the session. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio, nil] - optional :audio, -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio } - - # @!attribute expires_at - # Expiration timestamp for the session, in seconds since epoch. - # - # @return [Integer, nil] - optional :expires_at, Integer - - # @!attribute include - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - # - # @return [Array, nil] - optional :include, - -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include] } - - # @!attribute object - # The object type. Always `realtime.transcription_session`. - # - # @return [String, nil] - optional :object, String - - # @!method initialize(id: nil, audio: nil, expires_at: nil, include: nil, object: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session} for more - # details. - # - # A Realtime transcription session configuration object. - # - # @param id [String] Unique identifier for the session that looks like `sess_1234567890abcdef`. - # - # @param audio [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio] Configuration for input audio for the session. - # - # @param expires_at [Integer] Expiration timestamp for the session, in seconds since epoch. - # - # @param include [Array] Additional fields to include in server outputs. - # - # @param object [String] The object type. Always `realtime.transcription_session`. - - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session#audio - class Audio < OpenAI::Internal::Type::BaseModel - # @!attribute input - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input, nil] - optional :input, -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input } - - # @!method initialize(input: nil) - # Configuration for input audio for the session. - # - # @param input [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input] - - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio#input - class Input < OpenAI::Internal::Type::BaseModel - # @!attribute format_ - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @return [String, nil] - optional :format_, String, api_name: :format - - # @!attribute noise_reduction - # Configuration for input audio noise reduction. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction, nil] - optional :noise_reduction, - -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction } - - # @!attribute transcription - # Configuration of the transcription model. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription, nil] - optional :transcription, - -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription } - - # @!attribute turn_detection - # Configuration for turn detection. - # - # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection, nil] - optional :turn_detection, - -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection } - - # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input} - # for more details. - # - # @param format_ [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # - # @param noise_reduction [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction] Configuration for input audio noise reduction. - # - # @param transcription [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription] Configuration of the transcription model. - # - # @param turn_detection [OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection] Configuration for turn detection. - - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input#noise_reduction - class NoiseReduction < OpenAI::Internal::Type::BaseModel - # @!attribute type - # - # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type, nil] - optional :type, - enum: -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type } - - # @!method initialize(type: nil) - # Configuration for input audio noise reduction. - # - # @param type [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type] - - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction#type - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD = :near_field - FAR_FIELD = :far_field - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input#transcription - class Transcription < OpenAI::Internal::Type::BaseModel - # @!attribute language - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - # - # @return [String, nil] - optional :language, String - - # @!attribute model - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - # - # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model, nil] - optional :model, - enum: -> { OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model } - - # @!attribute prompt - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - # - # @return [String, nil] - optional :prompt, String - - # @!method initialize(language: nil, model: nil, prompt: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription} - # for more details. - # - # Configuration of the transcription model. - # - # @param language [String] The language of the input audio. Supplying the input language in - # - # @param model [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model] The model to use for transcription. Can be `gpt-4o-transcribe`, `gpt-4o-mini-tra - # - # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - # - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription#model - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe" - WHISPER_1 = :"whisper-1" - - # @!method self.values - # @return [Array] - end - end - - # @see OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input#turn_detection - class TurnDetection < OpenAI::Internal::Type::BaseModel - # @!attribute prefix_padding_ms - # - # @return [Integer, nil] - optional :prefix_padding_ms, Integer - - # @!attribute silence_duration_ms - # - # @return [Integer, nil] - optional :silence_duration_ms, Integer - - # @!attribute threshold - # - # @return [Float, nil] - optional :threshold, Float - - # @!attribute type - # Type of turn detection, only `server_vad` is currently supported. - # - # @return [String, nil] - optional :type, String - - # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil) - # Some parameter documentations has been truncated, see - # {OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection} - # for more details. - # - # Configuration for turn detection. - # - # @param prefix_padding_ms [Integer] - # - # @param silence_duration_ms [Integer] - # - # @param threshold [Float] - # - # @param type [String] Type of turn detection, only `server_vad` is currently supported. - end - end - end - - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs" - - # @!method self.values - # @return [Array] - end - end end end end diff --git a/lib/openai/resources/realtime/client_secrets.rb b/lib/openai/resources/realtime/client_secrets.rb index a647f99c..b80e995e 100644 --- a/lib/openai/resources/realtime/client_secrets.rb +++ b/lib/openai/resources/realtime/client_secrets.rb @@ -7,12 +7,11 @@ class ClientSecrets # Some parameter documentations has been truncated, see # {OpenAI::Models::Realtime::ClientSecretCreateParams} for more details. # - # Create a Realtime session and client secret for either realtime or - # transcription. + # Create a Realtime client secret with an associated session configuration. # # @overload create(expires_after: nil, session: nil, request_options: {}) # - # @param expires_after [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter] Configuration for the ephemeral token expiration. + # @param expires_after [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter] Configuration for the client secret expiration. Expiration refers to the time af # # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] Session configuration to use for the client secret. Choose either a realtime # diff --git a/lib/openai/version.rb b/lib/openai/version.rb index 03f0fff4..f6f2a771 100644 --- a/lib/openai/version.rb +++ b/lib/openai/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module OpenAI - VERSION = "0.22.1" + VERSION = "0.23.0" end diff --git a/rbi/openai/models/realtime/audio_transcription.rbi b/rbi/openai/models/realtime/audio_transcription.rbi new file mode 100644 index 00000000..d31bc86e --- /dev/null +++ b/rbi/openai/models/realtime/audio_transcription.rbi @@ -0,0 +1,132 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class AudioTranscription < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::AudioTranscription, + OpenAI::Internal::AnyHash + ) + end + + # The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + sig { returns(T.nilable(String)) } + attr_reader :language + + sig { params(language: String).void } + attr_writer :language + + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + sig do + returns( + T.nilable(OpenAI::Realtime::AudioTranscription::Model::OrSymbol) + ) + end + attr_reader :model + + sig do + params( + model: OpenAI::Realtime::AudioTranscription::Model::OrSymbol + ).void + end + attr_writer :model + + # An optional text to guide the model's style or continue a previous audio + # segment. For `whisper-1`, the + # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). + # For `gpt-4o-transcribe` models, the prompt is a free text string, for example + # "expect words related to technology". + sig { returns(T.nilable(String)) } + attr_reader :prompt + + sig { params(prompt: String).void } + attr_writer :prompt + + sig do + params( + language: String, + model: OpenAI::Realtime::AudioTranscription::Model::OrSymbol, + prompt: String + ).returns(T.attached_class) + end + def self.new( + # The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + language: nil, + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + model: nil, + # An optional text to guide the model's style or continue a previous audio + # segment. For `whisper-1`, the + # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). + # For `gpt-4o-transcribe` models, the prompt is a free text string, for example + # "expect words related to technology". + prompt: nil + ) + end + + sig do + override.returns( + { + language: String, + model: OpenAI::Realtime::AudioTranscription::Model::OrSymbol, + prompt: String + } + ) + end + def to_hash + end + + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + module Model + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all(Symbol, OpenAI::Realtime::AudioTranscription::Model) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + WHISPER_1 = + T.let( + :"whisper-1", + OpenAI::Realtime::AudioTranscription::Model::TaggedSymbol + ) + GPT_4O_TRANSCRIBE_LATEST = + T.let( + :"gpt-4o-transcribe-latest", + OpenAI::Realtime::AudioTranscription::Model::TaggedSymbol + ) + GPT_4O_MINI_TRANSCRIBE = + T.let( + :"gpt-4o-mini-transcribe", + OpenAI::Realtime::AudioTranscription::Model::TaggedSymbol + ) + GPT_4O_TRANSCRIBE = + T.let( + :"gpt-4o-transcribe", + OpenAI::Realtime::AudioTranscription::Model::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::AudioTranscription::Model::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/client_secret_create_params.rbi b/rbi/openai/models/realtime/client_secret_create_params.rbi index fc6da0e4..40d49311 100644 --- a/rbi/openai/models/realtime/client_secret_create_params.rbi +++ b/rbi/openai/models/realtime/client_secret_create_params.rbi @@ -15,7 +15,10 @@ module OpenAI ) end - # Configuration for the ephemeral token expiration. + # Configuration for the client secret expiration. Expiration refers to the time + # after which a client secret will no longer be valid for creating sessions. The + # session itself may continue after that time once started. A secret can be used + # to create multiple sessions until it expires. sig do returns( T.nilable(OpenAI::Realtime::ClientSecretCreateParams::ExpiresAfter) @@ -69,7 +72,10 @@ module OpenAI ).returns(T.attached_class) end def self.new( - # Configuration for the ephemeral token expiration. + # Configuration for the client secret expiration. Expiration refers to the time + # after which a client secret will no longer be valid for creating sessions. The + # session itself may continue after that time once started. A secret can be used + # to create multiple sessions until it expires. expires_after: nil, # Session configuration to use for the client secret. Choose either a realtime # session or a transcription session. @@ -104,8 +110,9 @@ module OpenAI ) end - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. + # The anchor point for the client secret expiration, meaning that `seconds` will + # be added to the `created_at` time of the client secret to produce an expiration + # timestamp. Only `created_at` is currently supported. sig do returns( T.nilable( @@ -124,14 +131,18 @@ module OpenAI attr_writer :anchor # The number of seconds from the anchor point to the expiration. Select a value - # between `10` and `7200`. + # between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if + # not specified. sig { returns(T.nilable(Integer)) } attr_reader :seconds sig { params(seconds: Integer).void } attr_writer :seconds - # Configuration for the ephemeral token expiration. + # Configuration for the client secret expiration. Expiration refers to the time + # after which a client secret will no longer be valid for creating sessions. The + # session itself may continue after that time once started. A secret can be used + # to create multiple sessions until it expires. sig do params( anchor: @@ -140,11 +151,13 @@ module OpenAI ).returns(T.attached_class) end def self.new( - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. + # The anchor point for the client secret expiration, meaning that `seconds` will + # be added to the `created_at` time of the client secret to produce an expiration + # timestamp. Only `created_at` is currently supported. anchor: nil, # The number of seconds from the anchor point to the expiration. Select a value - # between `10` and `7200`. + # between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if + # not specified. seconds: nil ) end @@ -161,8 +174,9 @@ module OpenAI def to_hash end - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. + # The anchor point for the client secret expiration, meaning that `seconds` will + # be added to the `created_at` time of the client secret to produce an expiration + # timestamp. Only `created_at` is currently supported. module Anchor extend OpenAI::Internal::Type::Enum diff --git a/rbi/openai/models/realtime/client_secret_create_response.rbi b/rbi/openai/models/realtime/client_secret_create_response.rbi index 4efaa115..9866f9d6 100644 --- a/rbi/openai/models/realtime/client_secret_create_response.rbi +++ b/rbi/openai/models/realtime/client_secret_create_response.rbi @@ -35,7 +35,7 @@ module OpenAI session: T.any( OpenAI::Realtime::RealtimeSessionCreateResponse::OrHash, - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::OrHash + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::OrHash ), value: String ).returns(T.attached_class) @@ -71,595 +71,10 @@ module OpenAI T.type_alias do T.any( OpenAI::Realtime::RealtimeSessionCreateResponse, - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse ) end - class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse, - OpenAI::Internal::AnyHash - ) - end - - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - sig { returns(T.nilable(String)) } - attr_reader :id - - sig { params(id: String).void } - attr_writer :id - - # Configuration for input audio for the session. - sig do - returns( - T.nilable( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio - ) - ) - end - attr_reader :audio - - sig do - params( - audio: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash - ).void - end - attr_writer :audio - - # Expiration timestamp for the session, in seconds since epoch. - sig { returns(T.nilable(Integer)) } - attr_reader :expires_at - - sig { params(expires_at: Integer).void } - attr_writer :expires_at - - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - sig do - returns( - T.nilable( - T::Array[ - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol - ] - ) - ) - end - attr_reader :include - - sig do - params( - include: - T::Array[ - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol - ] - ).void - end - attr_writer :include - - # The object type. Always `realtime.transcription_session`. - sig { returns(T.nilable(String)) } - attr_reader :object - - sig { params(object: String).void } - attr_writer :object - - # A Realtime transcription session configuration object. - sig do - params( - id: String, - audio: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash, - expires_at: Integer, - include: - T::Array[ - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol - ], - object: String - ).returns(T.attached_class) - end - def self.new( - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - id: nil, - # Configuration for input audio for the session. - audio: nil, - # Expiration timestamp for the session, in seconds since epoch. - expires_at: nil, - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - include: nil, - # The object type. Always `realtime.transcription_session`. - object: nil - ) - end - - sig do - override.returns( - { - id: String, - audio: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio, - expires_at: Integer, - include: - T::Array[ - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol - ], - object: String - } - ) - end - def to_hash - end - - class Audio < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio, - OpenAI::Internal::AnyHash - ) - end - - sig do - returns( - T.nilable( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input - ) - ) - end - attr_reader :input - - sig do - params( - input: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash - ).void - end - attr_writer :input - - # Configuration for input audio for the session. - sig do - params( - input: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash - ).returns(T.attached_class) - end - def self.new(input: nil) - end - - sig do - override.returns( - { - input: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input - } - ) - end - def to_hash - end - - class Input < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input, - OpenAI::Internal::AnyHash - ) - end - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - sig { returns(T.nilable(String)) } - attr_reader :format_ - - sig { params(format_: String).void } - attr_writer :format_ - - # Configuration for input audio noise reduction. - sig do - returns( - T.nilable( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction - ) - ) - end - attr_reader :noise_reduction - - sig do - params( - noise_reduction: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash - ).void - end - attr_writer :noise_reduction - - # Configuration of the transcription model. - sig do - returns( - T.nilable( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription - ) - ) - end - attr_reader :transcription - - sig do - params( - transcription: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::OrHash - ).void - end - attr_writer :transcription - - # Configuration for turn detection. - sig do - returns( - T.nilable( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection - ) - ) - end - attr_reader :turn_detection - - sig do - params( - turn_detection: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection::OrHash - ).void - end - attr_writer :turn_detection - - sig do - params( - format_: String, - noise_reduction: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash, - transcription: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::OrHash, - turn_detection: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection::OrHash - ).returns(T.attached_class) - end - def self.new( - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - format_: nil, - # Configuration for input audio noise reduction. - noise_reduction: nil, - # Configuration of the transcription model. - transcription: nil, - # Configuration for turn detection. - turn_detection: nil - ) - end - - sig do - override.returns( - { - format_: String, - noise_reduction: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction, - transcription: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription, - turn_detection: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection - } - ) - end - def to_hash - end - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction, - OpenAI::Internal::AnyHash - ) - end - - sig do - returns( - T.nilable( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol - ).void - end - attr_writer :type - - # Configuration for input audio noise reduction. - sig do - params( - type: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new(type: nil) - end - - sig do - override.returns( - { - type: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - } - ) - end - def to_hash - end - - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - NEAR_FIELD = - T.let( - :near_field, - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - FAR_FIELD = - T.let( - :far_field, - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class Transcription < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription, - OpenAI::Internal::AnyHash - ) - end - - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - sig { returns(T.nilable(String)) } - attr_reader :language - - sig { params(language: String).void } - attr_writer :language - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - sig do - returns( - T.nilable( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol - ) - ) - end - attr_reader :model - - sig do - params( - model: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::OrSymbol - ).void - end - attr_writer :model - - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - sig { returns(T.nilable(String)) } - attr_reader :prompt - - sig { params(prompt: String).void } - attr_writer :prompt - - # Configuration of the transcription model. - sig do - params( - language: String, - model: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::OrSymbol, - prompt: String - ).returns(T.attached_class) - end - def self.new( - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - language: nil, - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - model: nil, - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - prompt: nil - ) - end - - sig do - override.returns( - { - language: String, - model: - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol, - prompt: String - } - ) - end - def to_hash - end - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - module Model - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - GPT_4O_TRANSCRIBE = - T.let( - :"gpt-4o-transcribe", - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol - ) - GPT_4O_MINI_TRANSCRIBE = - T.let( - :"gpt-4o-mini-transcribe", - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol - ) - WHISPER_1 = - T.let( - :"whisper-1", - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class TurnDetection < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection, - OpenAI::Internal::AnyHash - ) - end - - sig { returns(T.nilable(Integer)) } - attr_reader :prefix_padding_ms - - sig { params(prefix_padding_ms: Integer).void } - attr_writer :prefix_padding_ms - - sig { returns(T.nilable(Integer)) } - attr_reader :silence_duration_ms - - sig { params(silence_duration_ms: Integer).void } - attr_writer :silence_duration_ms - - sig { returns(T.nilable(Float)) } - attr_reader :threshold - - sig { params(threshold: Float).void } - attr_writer :threshold - - # Type of turn detection, only `server_vad` is currently supported. - sig { returns(T.nilable(String)) } - attr_reader :type - - sig { params(type: String).void } - attr_writer :type - - # Configuration for turn detection. - sig do - params( - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - ).returns(T.attached_class) - end - def self.new( - prefix_padding_ms: nil, - silence_duration_ms: nil, - threshold: nil, - # Type of turn detection, only `server_vad` is currently supported. - type: nil - ) - end - - sig do - override.returns( - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - ) - end - def to_hash - end - end - end - end - - module Include - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = - T.let( - :"item.input_audio_transcription.logprobs", - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol - ] - ) - end - def self.values - end - end - end - sig do override.returns( T::Array[ diff --git a/rbi/openai/models/realtime/conversation_item_added.rbi b/rbi/openai/models/realtime/conversation_item_added.rbi index a585e650..547d0d6c 100644 --- a/rbi/openai/models/realtime/conversation_item_added.rbi +++ b/rbi/openai/models/realtime/conversation_item_added.rbi @@ -43,7 +43,20 @@ module OpenAI sig { returns(T.nilable(String)) } attr_accessor :previous_item_id - # Returned when a conversation item is added. + # Sent by the server when an Item is added to the default Conversation. This can + # happen in several cases: + # + # - When the client sends a `conversation.item.create` event. + # - When the input audio buffer is committed. In this case the item will be a user + # message containing the audio from the buffer. + # - When the model is generating a Response. In this case the + # `conversation.item.added` event will be sent when the model starts generating + # a specific Item, and thus it will not yet have any content (and `status` will + # be `in_progress`). + # + # The event will include the full content of the Item (except when model is + # generating a Response) except for audio data, which can be retrieved separately + # with a `conversation.item.retrieve` event if necessary. sig do params( event_id: String, diff --git a/rbi/openai/models/realtime/conversation_item_done.rbi b/rbi/openai/models/realtime/conversation_item_done.rbi index 77af97d9..139b59f3 100644 --- a/rbi/openai/models/realtime/conversation_item_done.rbi +++ b/rbi/openai/models/realtime/conversation_item_done.rbi @@ -44,6 +44,9 @@ module OpenAI attr_accessor :previous_item_id # Returned when a conversation item is finalized. + # + # The event will include the full content of the Item except for audio data, which + # can be retrieved separately with a `conversation.item.retrieve` event if needed. sig do params( event_id: String, diff --git a/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi b/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi index cf6b0efb..3bff7cb1 100644 --- a/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +++ b/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi @@ -20,7 +20,7 @@ module OpenAI sig { returns(String) } attr_accessor :event_id - # The ID of the user message item containing the audio. + # The ID of the item containing the audio that is being transcribed. sig { returns(String) } attr_accessor :item_id @@ -32,7 +32,8 @@ module OpenAI sig { returns(Symbol) } attr_accessor :type - # Usage statistics for the transcription. + # Usage statistics for the transcription, this is billed according to the ASR + # model's pricing rather than the realtime model's pricing. sig do returns( T.any( @@ -51,9 +52,9 @@ module OpenAI # This event is the output of audio transcription for user audio written to the # user audio buffer. Transcription begins when the input audio buffer is committed - # by the client or server (in `server_vad` mode). Transcription runs - # asynchronously with Response creation, so this event may come before or after - # the Response events. + # by the client or server (when VAD is enabled). Transcription runs asynchronously + # with Response creation, so this event may come before or after the Response + # events. # # Realtime API models accept audio natively, and thus input transcription is a # separate process run on a separate ASR (Automatic Speech Recognition) model. The @@ -80,11 +81,12 @@ module OpenAI content_index:, # The unique ID of the server event. event_id:, - # The ID of the user message item containing the audio. + # The ID of the item containing the audio that is being transcribed. item_id:, # The transcribed text. transcript:, - # Usage statistics for the transcription. + # Usage statistics for the transcription, this is billed according to the ASR + # model's pricing rather than the realtime model's pricing. usage:, # The log probabilities of the transcription. logprobs: nil, @@ -113,7 +115,8 @@ module OpenAI def to_hash end - # Usage statistics for the transcription. + # Usage statistics for the transcription, this is billed according to the ASR + # model's pricing rather than the realtime model's pricing. module Usage extend OpenAI::Internal::Type::Union diff --git a/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi b/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi index f6adbc4f..31ee8e4c 100644 --- a/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +++ b/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi @@ -16,7 +16,7 @@ module OpenAI sig { returns(String) } attr_accessor :event_id - # The ID of the item. + # The ID of the item containing the audio that is being transcribed. sig { returns(String) } attr_accessor :item_id @@ -38,14 +38,19 @@ module OpenAI sig { params(delta: String).void } attr_writer :delta - # The log probabilities of the transcription. + # The log probabilities of the transcription. These can be enabled by + # configurating the session with + # `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the + # array corresponds a log probability of which token would be selected for this + # chunk of transcription. This can help to identify if it was possible there were + # multiple valid options for a given chunk of transcription. sig do returns(T.nilable(T::Array[OpenAI::Realtime::LogProbProperties])) end attr_accessor :logprobs # Returned when the text value of an input audio transcription content part is - # updated. + # updated with incremental transcription results. sig do params( event_id: String, @@ -60,13 +65,18 @@ module OpenAI def self.new( # The unique ID of the server event. event_id:, - # The ID of the item. + # The ID of the item containing the audio that is being transcribed. item_id:, # The index of the content part in the item's content array. content_index: nil, # The text delta. delta: nil, - # The log probabilities of the transcription. + # The log probabilities of the transcription. These can be enabled by + # configurating the session with + # `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the + # array corresponds a log probability of which token would be selected for this + # chunk of transcription. This can help to identify if it was possible there were + # multiple valid options for a given chunk of transcription. logprobs: nil, # The event type, must be `conversation.item.input_audio_transcription.delta`. type: :"conversation.item.input_audio_transcription.delta" diff --git a/rbi/openai/models/realtime/conversation_item_truncate_event.rbi b/rbi/openai/models/realtime/conversation_item_truncate_event.rbi index 4cdced80..3498dfc3 100644 --- a/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +++ b/rbi/openai/models/realtime/conversation_item_truncate_event.rbi @@ -18,7 +18,7 @@ module OpenAI sig { returns(Integer) } attr_accessor :audio_end_ms - # The index of the content part to truncate. Set this to 0. + # The index of the content part to truncate. Set this to `0`. sig { returns(Integer) } attr_accessor :content_index @@ -63,7 +63,7 @@ module OpenAI # audio_end_ms is greater than the actual audio duration, the server will respond # with an error. audio_end_ms:, - # The index of the content part to truncate. Set this to 0. + # The index of the content part to truncate. Set this to `0`. content_index:, # The ID of the assistant message item to truncate. Only assistant message items # can be truncated. diff --git a/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi b/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi index 55f42911..070ae12d 100644 --- a/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +++ b/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi @@ -29,14 +29,19 @@ module OpenAI attr_writer :event_id # Send this event to append audio bytes to the input audio buffer. The audio - # buffer is temporary storage you can write to and later commit. In Server VAD - # mode, the audio buffer is used to detect speech and the server will decide when - # to commit. When Server VAD is disabled, you must commit the audio buffer - # manually. + # buffer is temporary storage you can write to and later commit. A "commit" will + # create a new user message item in the conversation history from the buffer + # content and clear the buffer. Input audio transcription (if enabled) will be + # generated when the buffer is committed. + # + # If VAD is enabled the audio buffer is used to detect speech and the server will + # decide when to commit. When Server VAD is disabled, you must commit the audio + # buffer manually. Input audio noise reduction operates on writes to the audio + # buffer. # # The client may choose how much audio to place in each event up to a maximum of # 15 MiB, for example streaming smaller chunks from the client may allow the VAD - # to be more responsive. Unlike made other client events, the server will not send + # to be more responsive. Unlike most other client events, the server will not send # a confirmation response to this event. sig do params(audio: String, event_id: String, type: Symbol).returns( diff --git a/rbi/openai/models/realtime/models.rbi b/rbi/openai/models/realtime/models.rbi new file mode 100644 index 00000000..fecd47c3 --- /dev/null +++ b/rbi/openai/models/realtime/models.rbi @@ -0,0 +1,97 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class Models < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any(OpenAI::Realtime::Models, OpenAI::Internal::AnyHash) + end + + # The description of the function, including guidance on when and how to call it, + # and guidance about what to tell the user when calling (if anything). + sig { returns(T.nilable(String)) } + attr_reader :description + + sig { params(description: String).void } + attr_writer :description + + # The name of the function. + sig { returns(T.nilable(String)) } + attr_reader :name + + sig { params(name: String).void } + attr_writer :name + + # Parameters of the function in JSON Schema. + sig { returns(T.nilable(T.anything)) } + attr_reader :parameters + + sig { params(parameters: T.anything).void } + attr_writer :parameters + + # The type of the tool, i.e. `function`. + sig { returns(T.nilable(OpenAI::Realtime::Models::Type::OrSymbol)) } + attr_reader :type + + sig { params(type: OpenAI::Realtime::Models::Type::OrSymbol).void } + attr_writer :type + + sig do + params( + description: String, + name: String, + parameters: T.anything, + type: OpenAI::Realtime::Models::Type::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # The description of the function, including guidance on when and how to call it, + # and guidance about what to tell the user when calling (if anything). + description: nil, + # The name of the function. + name: nil, + # Parameters of the function in JSON Schema. + parameters: nil, + # The type of the tool, i.e. `function`. + type: nil + ) + end + + sig do + override.returns( + { + description: String, + name: String, + parameters: T.anything, + type: OpenAI::Realtime::Models::Type::OrSymbol + } + ) + end + def to_hash + end + + # The type of the tool, i.e. `function`. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias { T.all(Symbol, OpenAI::Realtime::Models::Type) } + OrSymbol = T.type_alias { T.any(Symbol, String) } + + FUNCTION = + T.let(:function, OpenAI::Realtime::Models::Type::TaggedSymbol) + + sig do + override.returns( + T::Array[OpenAI::Realtime::Models::Type::TaggedSymbol] + ) + end + def self.values + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/noise_reduction_type.rbi b/rbi/openai/models/realtime/noise_reduction_type.rbi new file mode 100644 index 00000000..a22ba985 --- /dev/null +++ b/rbi/openai/models/realtime/noise_reduction_type.rbi @@ -0,0 +1,31 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + module NoiseReductionType + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias { T.all(Symbol, OpenAI::Realtime::NoiseReductionType) } + OrSymbol = T.type_alias { T.any(Symbol, String) } + + NEAR_FIELD = + T.let(:near_field, OpenAI::Realtime::NoiseReductionType::TaggedSymbol) + FAR_FIELD = + T.let(:far_field, OpenAI::Realtime::NoiseReductionType::TaggedSymbol) + + sig do + override.returns( + T::Array[OpenAI::Realtime::NoiseReductionType::TaggedSymbol] + ) + end + def self.values + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_audio_config.rbi b/rbi/openai/models/realtime/realtime_audio_config.rbi index e07f16bc..9dc05144 100644 --- a/rbi/openai/models/realtime/realtime_audio_config.rbi +++ b/rbi/openai/models/realtime/realtime_audio_config.rbi @@ -12,24 +12,20 @@ module OpenAI ) end - sig { returns(T.nilable(OpenAI::Realtime::RealtimeAudioConfig::Input)) } + sig { returns(T.nilable(OpenAI::Realtime::RealtimeAudioConfigInput)) } attr_reader :input sig do - params( - input: OpenAI::Realtime::RealtimeAudioConfig::Input::OrHash - ).void + params(input: OpenAI::Realtime::RealtimeAudioConfigInput::OrHash).void end attr_writer :input - sig do - returns(T.nilable(OpenAI::Realtime::RealtimeAudioConfig::Output)) - end + sig { returns(T.nilable(OpenAI::Realtime::RealtimeAudioConfigOutput)) } attr_reader :output sig do params( - output: OpenAI::Realtime::RealtimeAudioConfig::Output::OrHash + output: OpenAI::Realtime::RealtimeAudioConfigOutput::OrHash ).void end attr_writer :output @@ -37,8 +33,8 @@ module OpenAI # Configuration for input and output audio. sig do params( - input: OpenAI::Realtime::RealtimeAudioConfig::Input::OrHash, - output: OpenAI::Realtime::RealtimeAudioConfig::Output::OrHash + input: OpenAI::Realtime::RealtimeAudioConfigInput::OrHash, + output: OpenAI::Realtime::RealtimeAudioConfigOutput::OrHash ).returns(T.attached_class) end def self.new(input: nil, output: nil) @@ -47,957 +43,13 @@ module OpenAI sig do override.returns( { - input: OpenAI::Realtime::RealtimeAudioConfig::Input, - output: OpenAI::Realtime::RealtimeAudioConfig::Output + input: OpenAI::Realtime::RealtimeAudioConfigInput, + output: OpenAI::Realtime::RealtimeAudioConfigOutput } ) end def to_hash end - - class Input < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeAudioConfig::Input, - OpenAI::Internal::AnyHash - ) - end - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol - ) - ) - end - attr_reader :format_ - - sig do - params( - format_: - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol - ).void - end - attr_writer :format_ - - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction - ) - ) - end - attr_reader :noise_reduction - - sig do - params( - noise_reduction: - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::OrHash - ).void - end - attr_writer :noise_reduction - - # Configuration for input audio transcription, defaults to off and can be set to - # `null` to turn off once on. Input audio transcription is not native to the - # model, since the model consumes audio directly. Transcription runs - # asynchronously through - # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - # and should be treated as guidance of input audio content rather than precisely - # what the model heard. The client can optionally set the language and prompt for - # transcription, these offer additional guidance to the transcription service. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription - ) - ) - end - attr_reader :transcription - - sig do - params( - transcription: - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::OrHash - ).void - end - attr_writer :transcription - - # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be - # set to `null` to turn off, in which case the client must manually trigger model - # response. Server VAD means that the model will detect the start and end of - # speech based on audio volume and respond at the end of user speech. Semantic VAD - # is more advanced and uses a turn detection model (in conjunction with VAD) to - # semantically estimate whether the user has finished speaking, then dynamically - # sets a timeout based on this probability. For example, if user audio trails off - # with "uhhm", the model will score a low probability of turn end and wait longer - # for the user to continue speaking. This can be useful for more natural - # conversations, but may have a higher latency. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection - ) - ) - end - attr_reader :turn_detection - - sig do - params( - turn_detection: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::OrHash - ).void - end - attr_writer :turn_detection - - sig do - params( - format_: - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol, - noise_reduction: - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::OrHash, - transcription: - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::OrHash, - turn_detection: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::OrHash - ).returns(T.attached_class) - end - def self.new( - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - format_: nil, - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - noise_reduction: nil, - # Configuration for input audio transcription, defaults to off and can be set to - # `null` to turn off once on. Input audio transcription is not native to the - # model, since the model consumes audio directly. Transcription runs - # asynchronously through - # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - # and should be treated as guidance of input audio content rather than precisely - # what the model heard. The client can optionally set the language and prompt for - # transcription, these offer additional guidance to the transcription service. - transcription: nil, - # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be - # set to `null` to turn off, in which case the client must manually trigger model - # response. Server VAD means that the model will detect the start and end of - # speech based on audio volume and respond at the end of user speech. Semantic VAD - # is more advanced and uses a turn detection model (in conjunction with VAD) to - # semantically estimate whether the user has finished speaking, then dynamically - # sets a timeout based on this probability. For example, if user audio trails off - # with "uhhm", the model will score a low probability of turn end and wait longer - # for the user to continue speaking. This can be useful for more natural - # conversations, but may have a higher latency. - turn_detection: nil - ) - end - - sig do - override.returns( - { - format_: - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol, - noise_reduction: - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction, - transcription: - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription, - turn_detection: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection - } - ) - end - def to_hash - end - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - module Format - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeAudioConfig::Input::Format - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - PCM16 = - T.let( - :pcm16, - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol - ) - G711_ULAW = - T.let( - :g711_ulaw, - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol - ) - G711_ALAW = - T.let( - :g711_alaw, - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol - ] - ) - end - def self.values - end - end - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction, - OpenAI::Internal::AnyHash - ) - end - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol - ).void - end - attr_writer :type - - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - sig do - params( - type: - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - type: nil - ) - end - - sig do - override.returns( - { - type: - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol - } - ) - end - def to_hash - end - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - NEAR_FIELD = - T.let( - :near_field, - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::TaggedSymbol - ) - FAR_FIELD = - T.let( - :far_field, - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class Transcription < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription, - OpenAI::Internal::AnyHash - ) - end - - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - sig { returns(T.nilable(String)) } - attr_reader :language - - sig { params(language: String).void } - attr_writer :language - - # The model to use for transcription. Current options are `whisper-1`, - # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and - # `gpt-4o-transcribe-diarize`. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol - ) - ) - end - attr_reader :model - - sig do - params( - model: - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol - ).void - end - attr_writer :model - - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - sig { returns(T.nilable(String)) } - attr_reader :prompt - - sig { params(prompt: String).void } - attr_writer :prompt - - # Configuration for input audio transcription, defaults to off and can be set to - # `null` to turn off once on. Input audio transcription is not native to the - # model, since the model consumes audio directly. Transcription runs - # asynchronously through - # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - # and should be treated as guidance of input audio content rather than precisely - # what the model heard. The client can optionally set the language and prompt for - # transcription, these offer additional guidance to the transcription service. - sig do - params( - language: String, - model: - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol, - prompt: String - ).returns(T.attached_class) - end - def self.new( - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - language: nil, - # The model to use for transcription. Current options are `whisper-1`, - # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and - # `gpt-4o-transcribe-diarize`. - model: nil, - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - prompt: nil - ) - end - - sig do - override.returns( - { - language: String, - model: - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol, - prompt: String - } - ) - end - def to_hash - end - - # The model to use for transcription. Current options are `whisper-1`, - # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and - # `gpt-4o-transcribe-diarize`. - module Model - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - WHISPER_1 = - T.let( - :"whisper-1", - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol - ) - GPT_4O_TRANSCRIBE_LATEST = - T.let( - :"gpt-4o-transcribe-latest", - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol - ) - GPT_4O_MINI_TRANSCRIBE = - T.let( - :"gpt-4o-mini-transcribe", - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol - ) - GPT_4O_TRANSCRIBE = - T.let( - :"gpt-4o-transcribe", - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol - ) - GPT_4O_TRANSCRIBE_DIARIZE = - T.let( - :"gpt-4o-transcribe-diarize", - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class TurnDetection < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection, - OpenAI::Internal::AnyHash - ) - end - - # Whether or not to automatically generate a response when a VAD stop event - # occurs. - sig { returns(T.nilable(T::Boolean)) } - attr_reader :create_response - - sig { params(create_response: T::Boolean).void } - attr_writer :create_response - - # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - # will wait longer for the user to continue speaking, `high` will respond more - # quickly. `auto` is the default and is equivalent to `medium`. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol - ) - ) - end - attr_reader :eagerness - - sig do - params( - eagerness: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol - ).void - end - attr_writer :eagerness - - # Optional idle timeout after which turn detection will auto-timeout when no - # additional audio is received. - sig { returns(T.nilable(Integer)) } - attr_accessor :idle_timeout_ms - - # Whether or not to automatically interrupt any ongoing response with output to - # the default conversation (i.e. `conversation` of `auto`) when a VAD start event - # occurs. - sig { returns(T.nilable(T::Boolean)) } - attr_reader :interrupt_response - - sig { params(interrupt_response: T::Boolean).void } - attr_writer :interrupt_response - - # Used only for `server_vad` mode. Amount of audio to include before the VAD - # detected speech (in milliseconds). Defaults to 300ms. - sig { returns(T.nilable(Integer)) } - attr_reader :prefix_padding_ms - - sig { params(prefix_padding_ms: Integer).void } - attr_writer :prefix_padding_ms - - # Used only for `server_vad` mode. Duration of silence to detect speech stop (in - # milliseconds). Defaults to 500ms. With shorter values the model will respond - # more quickly, but may jump in on short pauses from the user. - sig { returns(T.nilable(Integer)) } - attr_reader :silence_duration_ms - - sig { params(silence_duration_ms: Integer).void } - attr_writer :silence_duration_ms - - # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this - # defaults to 0.5. A higher threshold will require louder audio to activate the - # model, and thus might perform better in noisy environments. - sig { returns(T.nilable(Float)) } - attr_reader :threshold - - sig { params(threshold: Float).void } - attr_writer :threshold - - # Type of turn detection. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol - ).void - end - attr_writer :type - - # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be - # set to `null` to turn off, in which case the client must manually trigger model - # response. Server VAD means that the model will detect the start and end of - # speech based on audio volume and respond at the end of user speech. Semantic VAD - # is more advanced and uses a turn detection model (in conjunction with VAD) to - # semantically estimate whether the user has finished speaking, then dynamically - # sets a timeout based on this probability. For example, if user audio trails off - # with "uhhm", the model will score a low probability of turn end and wait longer - # for the user to continue speaking. This can be useful for more natural - # conversations, but may have a higher latency. - sig do - params( - create_response: T::Boolean, - eagerness: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol, - idle_timeout_ms: T.nilable(Integer), - interrupt_response: T::Boolean, - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # Whether or not to automatically generate a response when a VAD stop event - # occurs. - create_response: nil, - # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - # will wait longer for the user to continue speaking, `high` will respond more - # quickly. `auto` is the default and is equivalent to `medium`. - eagerness: nil, - # Optional idle timeout after which turn detection will auto-timeout when no - # additional audio is received. - idle_timeout_ms: nil, - # Whether or not to automatically interrupt any ongoing response with output to - # the default conversation (i.e. `conversation` of `auto`) when a VAD start event - # occurs. - interrupt_response: nil, - # Used only for `server_vad` mode. Amount of audio to include before the VAD - # detected speech (in milliseconds). Defaults to 300ms. - prefix_padding_ms: nil, - # Used only for `server_vad` mode. Duration of silence to detect speech stop (in - # milliseconds). Defaults to 500ms. With shorter values the model will respond - # more quickly, but may jump in on short pauses from the user. - silence_duration_ms: nil, - # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this - # defaults to 0.5. A higher threshold will require louder audio to activate the - # model, and thus might perform better in noisy environments. - threshold: nil, - # Type of turn detection. - type: nil - ) - end - - sig do - override.returns( - { - create_response: T::Boolean, - eagerness: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol, - idle_timeout_ms: T.nilable(Integer), - interrupt_response: T::Boolean, - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol - } - ) - end - def to_hash - end - - # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - # will wait longer for the user to continue speaking, `high` will respond more - # quickly. `auto` is the default and is equivalent to `medium`. - module Eagerness - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - LOW = - T.let( - :low, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol - ) - MEDIUM = - T.let( - :medium, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol - ) - HIGH = - T.let( - :high, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol - ) - AUTO = - T.let( - :auto, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol - ] - ) - end - def self.values - end - end - - # Type of turn detection. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - SERVER_VAD = - T.let( - :server_vad, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::TaggedSymbol - ) - SEMANTIC_VAD = - T.let( - :semantic_vad, - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - end - - class Output < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeAudioConfig::Output, - OpenAI::Internal::AnyHash - ) - end - - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # For `pcm16`, output audio is sampled at a rate of 24kHz. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol - ) - ) - end - attr_reader :format_ - - sig do - params( - format_: - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol - ).void - end - attr_writer :format_ - - # The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the - # minimum speed. 1.5 is the maximum speed. This value can only be changed in - # between model turns, not while a response is in progress. - sig { returns(T.nilable(Float)) } - attr_reader :speed - - sig { params(speed: Float).void } - attr_writer :speed - - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, - # and `cedar`. - sig do - returns( - T.nilable( - T.any( - String, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol - ) - ) - ) - end - attr_reader :voice - - sig do - params( - voice: - T.any( - String, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol - ) - ).void - end - attr_writer :voice - - sig do - params( - format_: - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol, - speed: Float, - voice: - T.any( - String, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol - ) - ).returns(T.attached_class) - end - def self.new( - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # For `pcm16`, output audio is sampled at a rate of 24kHz. - format_: nil, - # The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the - # minimum speed. 1.5 is the maximum speed. This value can only be changed in - # between model turns, not while a response is in progress. - speed: nil, - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, - # and `cedar`. - voice: nil - ) - end - - sig do - override.returns( - { - format_: - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol, - speed: Float, - voice: - T.any( - String, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol - ) - } - ) - end - def to_hash - end - - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - # For `pcm16`, output audio is sampled at a rate of 24kHz. - module Format - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeAudioConfig::Output::Format - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - PCM16 = - T.let( - :pcm16, - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol - ) - G711_ULAW = - T.let( - :g711_ulaw, - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol - ) - G711_ALAW = - T.let( - :g711_alaw, - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol - ] - ) - end - def self.values - end - end - - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, - # and `cedar`. - module Voice - extend OpenAI::Internal::Type::Union - - Variants = - T.type_alias do - T.any( - String, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - end - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::Variants - ] - ) - end - def self.variants - end - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - ALLOY = - T.let( - :alloy, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - ASH = - T.let( - :ash, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - BALLAD = - T.let( - :ballad, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - CORAL = - T.let( - :coral, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - ECHO = - T.let( - :echo, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - SAGE = - T.let( - :sage, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - SHIMMER = - T.let( - :shimmer, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - VERSE = - T.let( - :verse, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - MARIN = - T.let( - :marin, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - CEDAR = - T.let( - :cedar, - OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol - ) - end - end end end end diff --git a/rbi/openai/models/realtime/realtime_audio_config_input.rbi b/rbi/openai/models/realtime/realtime_audio_config_input.rbi new file mode 100644 index 00000000..e33b3fce --- /dev/null +++ b/rbi/openai/models/realtime/realtime_audio_config_input.rbi @@ -0,0 +1,221 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioConfigInput, + OpenAI::Internal::AnyHash + ) + end + + # The format of the input audio. + sig do + returns( + T.nilable( + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ) + ) + ) + end + attr_reader :format_ + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ) + ).void + end + attr_writer :format_ + + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction + ) + ) + end + attr_reader :noise_reduction + + sig do + params( + noise_reduction: + OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction::OrHash + ).void + end + attr_writer :noise_reduction + + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. + sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) } + attr_reader :transcription + + sig do + params( + transcription: OpenAI::Realtime::AudioTranscription::OrHash + ).void + end + attr_writer :transcription + + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + sig do + returns(T.nilable(OpenAI::Realtime::RealtimeAudioInputTurnDetection)) + end + attr_reader :turn_detection + + sig do + params( + turn_detection: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::OrHash + ).void + end + attr_writer :turn_detection + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ), + noise_reduction: + OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction::OrHash, + transcription: OpenAI::Realtime::AudioTranscription::OrHash, + turn_detection: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::OrHash + ).returns(T.attached_class) + end + def self.new( + # The format of the input audio. + format_: nil, + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + noise_reduction: nil, + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. + transcription: nil, + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + turn_detection: nil + ) + end + + sig do + override.returns( + { + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ), + noise_reduction: + OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction, + transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: OpenAI::Realtime::RealtimeAudioInputTurnDetection + } + ) + end + def to_hash + end + + class NoiseReduction < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction, + OpenAI::Internal::AnyHash + ) + end + + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + sig do + returns(T.nilable(OpenAI::Realtime::NoiseReductionType::OrSymbol)) + end + attr_reader :type + + sig do + params(type: OpenAI::Realtime::NoiseReductionType::OrSymbol).void + end + attr_writer :type + + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + sig do + params( + type: OpenAI::Realtime::NoiseReductionType::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + type: nil + ) + end + + sig do + override.returns( + { type: OpenAI::Realtime::NoiseReductionType::OrSymbol } + ) + end + def to_hash + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_audio_config_output.rbi b/rbi/openai/models/realtime/realtime_audio_config_output.rbi new file mode 100644 index 00000000..6a26cf85 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_audio_config_output.rbi @@ -0,0 +1,222 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioConfigOutput, + OpenAI::Internal::AnyHash + ) + end + + # The format of the output audio. + sig do + returns( + T.nilable( + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ) + ) + ) + end + attr_reader :format_ + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ) + ).void + end + attr_writer :format_ + + # The speed of the model's spoken response as a multiple of the original speed. + # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + # This value can only be changed in between model turns, not while a response is + # in progress. + # + # This parameter is a post-processing adjustment to the audio after it is + # generated, it's also possible to prompt the model to speak faster or slower. + sig { returns(T.nilable(Float)) } + attr_reader :speed + + sig { params(speed: Float).void } + attr_writer :speed + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + sig do + returns( + T.nilable( + T.any( + String, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol + ) + ) + ) + end + attr_reader :voice + + sig do + params( + voice: + T.any( + String, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol + ) + ).void + end + attr_writer :voice + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ), + speed: Float, + voice: + T.any( + String, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol + ) + ).returns(T.attached_class) + end + def self.new( + # The format of the output audio. + format_: nil, + # The speed of the model's spoken response as a multiple of the original speed. + # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + # This value can only be changed in between model turns, not while a response is + # in progress. + # + # This parameter is a post-processing adjustment to the audio after it is + # generated, it's also possible to prompt the model to speak faster or slower. + speed: nil, + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + voice: nil + ) + end + + sig do + override.returns( + { + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ), + speed: Float, + voice: + T.any( + String, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol + ) + } + ) + end + def to_hash + end + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + module Voice + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + String, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::Variants + ] + ) + end + def self.variants + end + + TaggedSymbol = + T.type_alias do + T.all(Symbol, OpenAI::Realtime::RealtimeAudioConfigOutput::Voice) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + ALLOY = + T.let( + :alloy, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + ASH = + T.let( + :ash, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + BALLAD = + T.let( + :ballad, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + CORAL = + T.let( + :coral, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + ECHO = + T.let( + :echo, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + SAGE = + T.let( + :sage, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + SHIMMER = + T.let( + :shimmer, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + VERSE = + T.let( + :verse, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + MARIN = + T.let( + :marin, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + CEDAR = + T.let( + :cedar, + OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol + ) + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_audio_formats.rbi b/rbi/openai/models/realtime/realtime_audio_formats.rbi new file mode 100644 index 00000000..92f4ea3c --- /dev/null +++ b/rbi/openai/models/realtime/realtime_audio_formats.rbi @@ -0,0 +1,329 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + # The PCM audio format. Only a 24kHz sample rate is supported. + module RealtimeAudioFormats + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ) + end + + class AudioPCM < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Internal::AnyHash + ) + end + + # The sample rate of the audio. Always `24000`. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger + ) + ) + end + attr_reader :rate + + sig do + params( + rate: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger + ).void + end + attr_writer :rate + + # The audio format. Always `audio/pcm`. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol + ) + ) + end + attr_reader :type + + sig do + params( + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol + ).void + end + attr_writer :type + + # The PCM audio format. Only a 24kHz sample rate is supported. + sig do + params( + rate: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger, + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # The sample rate of the audio. Always `24000`. + rate: nil, + # The audio format. Always `audio/pcm`. + type: nil + ) + end + + sig do + override.returns( + { + rate: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger, + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol + } + ) + end + def to_hash + end + + # The sample rate of the audio. Always `24000`. + module Rate + extend OpenAI::Internal::Type::Enum + + TaggedInteger = + T.type_alias do + T.all( + Integer, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate + ) + end + OrInteger = T.type_alias { Integer } + + RATE_24000 = + T.let( + 24_000, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::TaggedInteger + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::TaggedInteger + ] + ) + end + def self.values + end + end + + # The audio format. Always `audio/pcm`. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + AUDIO_PCM = + T.let( + :"audio/pcm", + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::TaggedSymbol + ] + ) + end + def self.values + end + end + end + + class AudioPCMU < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Internal::AnyHash + ) + end + + # The audio format. Always `audio/pcmu`. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol + ) + ) + end + attr_reader :type + + sig do + params( + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol + ).void + end + attr_writer :type + + # The G.711 μ-law format. + sig do + params( + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # The audio format. Always `audio/pcmu`. + type: nil + ) + end + + sig do + override.returns( + { + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol + } + ) + end + def to_hash + end + + # The audio format. Always `audio/pcmu`. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + AUDIO_PCMU = + T.let( + :"audio/pcmu", + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::TaggedSymbol + ] + ) + end + def self.values + end + end + end + + class AudioPCMA < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA, + OpenAI::Internal::AnyHash + ) + end + + # The audio format. Always `audio/pcma`. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol + ) + ) + end + attr_reader :type + + sig do + params( + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol + ).void + end + attr_writer :type + + # The G.711 A-law format. + sig do + params( + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # The audio format. Always `audio/pcma`. + type: nil + ) + end + + sig do + override.returns( + { + type: + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol + } + ) + end + def to_hash + end + + # The audio format. Always `audio/pcma`. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + AUDIO_PCMA = + T.let( + :"audio/pcma", + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::TaggedSymbol + ] + ) + end + def self.values + end + end + end + + sig do + override.returns( + T::Array[OpenAI::Realtime::RealtimeAudioFormats::Variants] + ) + end + def self.variants + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi b/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi new file mode 100644 index 00000000..5febcfa0 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi @@ -0,0 +1,262 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeAudioInputTurnDetection, + OpenAI::Internal::AnyHash + ) + end + + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :create_response + + sig { params(create_response: T::Boolean).void } + attr_writer :create_response + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol + ) + ) + end + attr_reader :eagerness + + sig do + params( + eagerness: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol + ).void + end + attr_writer :eagerness + + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + sig { returns(T.nilable(Integer)) } + attr_accessor :idle_timeout_ms + + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :interrupt_response + + sig { params(interrupt_response: T::Boolean).void } + attr_writer :interrupt_response + + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. + sig { returns(T.nilable(Integer)) } + attr_reader :prefix_padding_ms + + sig { params(prefix_padding_ms: Integer).void } + attr_writer :prefix_padding_ms + + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. + sig { returns(T.nilable(Integer)) } + attr_reader :silence_duration_ms + + sig { params(silence_duration_ms: Integer).void } + attr_writer :silence_duration_ms + + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. + sig { returns(T.nilable(Float)) } + attr_reader :threshold + + sig { params(threshold: Float).void } + attr_writer :threshold + + # Type of turn detection. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol + ) + ) + end + attr_reader :type + + sig do + params( + type: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol + ).void + end + attr_writer :type + + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + sig do + params( + create_response: T::Boolean, + eagerness: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol, + idle_timeout_ms: T.nilable(Integer), + interrupt_response: T::Boolean, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + create_response: nil, + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + eagerness: nil, + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + idle_timeout_ms: nil, + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + interrupt_response: nil, + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. + prefix_padding_ms: nil, + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. + silence_duration_ms: nil, + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. + threshold: nil, + # Type of turn detection. + type: nil + ) + end + + sig do + override.returns( + { + create_response: T::Boolean, + eagerness: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol, + idle_timeout_ms: T.nilable(Integer), + interrupt_response: T::Boolean, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol + } + ) + end + def to_hash + end + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + module Eagerness + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + LOW = + T.let( + :low, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + MEDIUM = + T.let( + :medium, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + HIGH = + T.let( + :high, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + AUTO = + T.let( + :auto, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol + ] + ) + end + def self.values + end + end + + # Type of turn detection. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + SERVER_VAD = + T.let( + :server_vad, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol + ) + SEMANTIC_VAD = + T.let( + :semantic_vad, + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_client_secret_config.rbi b/rbi/openai/models/realtime/realtime_client_secret_config.rbi deleted file mode 100644 index a6557710..00000000 --- a/rbi/openai/models/realtime/realtime_client_secret_config.rbi +++ /dev/null @@ -1,147 +0,0 @@ -# typed: strong - -module OpenAI - module Models - module Realtime - class RealtimeClientSecretConfig < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeClientSecretConfig, - OpenAI::Internal::AnyHash - ) - end - - # Configuration for the ephemeral token expiration. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter - ) - ) - end - attr_reader :expires_after - - sig do - params( - expires_after: - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::OrHash - ).void - end - attr_writer :expires_after - - # Configuration options for the generated client secret. - sig do - params( - expires_after: - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::OrHash - ).returns(T.attached_class) - end - def self.new( - # Configuration for the ephemeral token expiration. - expires_after: nil - ) - end - - sig do - override.returns( - { - expires_after: - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter - } - ) - end - def to_hash - end - - class ExpiresAfter < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter, - OpenAI::Internal::AnyHash - ) - end - - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. - sig do - returns( - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor::OrSymbol - ) - end - attr_accessor :anchor - - # The number of seconds from the anchor point to the expiration. Select a value - # between `10` and `7200`. - sig { returns(T.nilable(Integer)) } - attr_reader :seconds - - sig { params(seconds: Integer).void } - attr_writer :seconds - - # Configuration for the ephemeral token expiration. - sig do - params( - anchor: - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor::OrSymbol, - seconds: Integer - ).returns(T.attached_class) - end - def self.new( - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. - anchor:, - # The number of seconds from the anchor point to the expiration. Select a value - # between `10` and `7200`. - seconds: nil - ) - end - - sig do - override.returns( - { - anchor: - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor::OrSymbol, - seconds: Integer - } - ) - end - def to_hash - end - - # The anchor point for the ephemeral token expiration. Only `created_at` is - # currently supported. - module Anchor - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - CREATED_AT = - T.let( - :created_at, - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter::Anchor::TaggedSymbol - ] - ) - end - def self.values - end - end - end - end - end - end -end diff --git a/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi b/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi index 5bbe9bb4..fed61639 100644 --- a/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +++ b/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi @@ -30,14 +30,16 @@ module OpenAI sig { returns(Symbol) } attr_accessor :type - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. sig { returns(T.nilable(String)) } attr_reader :id sig { params(id: String).void } attr_writer :id - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. sig do returns( T.nilable( @@ -92,9 +94,11 @@ module OpenAI def self.new( # The content of the message. content:, - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. id: nil, - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. object: nil, # The status of the item. Has no effect on the conversation. status: nil, @@ -134,6 +138,15 @@ module OpenAI ) end + # Base64-encoded audio bytes, these will be parsed as the format specified in the + # session output audio type configuration. This defaults to PCM 16-bit 24kHz mono + # if not specified. + sig { returns(T.nilable(String)) } + attr_reader :audio + + sig { params(audio: String).void } + attr_writer :audio + # The text content. sig { returns(T.nilable(String)) } attr_reader :text @@ -141,7 +154,16 @@ module OpenAI sig { params(text: String).void } attr_writer :text - # The content type. Always `text` for assistant messages. + # The transcript of the audio content, this will always be present if the output + # type is `audio`. + sig { returns(T.nilable(String)) } + attr_reader :transcript + + sig { params(transcript: String).void } + attr_writer :transcript + + # The content type, `output_text` or `output_audio` depending on the session + # `output_modalities` configuration. sig do returns( T.nilable( @@ -161,15 +183,25 @@ module OpenAI sig do params( + audio: String, text: String, + transcript: String, type: OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::OrSymbol ).returns(T.attached_class) end def self.new( + # Base64-encoded audio bytes, these will be parsed as the format specified in the + # session output audio type configuration. This defaults to PCM 16-bit 24kHz mono + # if not specified. + audio: nil, # The text content. text: nil, - # The content type. Always `text` for assistant messages. + # The transcript of the audio content, this will always be present if the output + # type is `audio`. + transcript: nil, + # The content type, `output_text` or `output_audio` depending on the session + # `output_modalities` configuration. type: nil ) end @@ -177,7 +209,9 @@ module OpenAI sig do override.returns( { + audio: String, text: String, + transcript: String, type: OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::OrSymbol } @@ -186,7 +220,8 @@ module OpenAI def to_hash end - # The content type. Always `text` for assistant messages. + # The content type, `output_text` or `output_audio` depending on the session + # `output_modalities` configuration. module Type extend OpenAI::Internal::Type::Enum @@ -199,9 +234,14 @@ module OpenAI end OrSymbol = T.type_alias { T.any(Symbol, String) } - TEXT = + OUTPUT_TEXT = + T.let( + :output_text, + OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::TaggedSymbol + ) + OUTPUT_AUDIO = T.let( - :text, + :output_audio, OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::TaggedSymbol ) @@ -217,7 +257,8 @@ module OpenAI end end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. module Object extend OpenAI::Internal::Type::Enum diff --git a/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi b/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi index def31c95..64f8d384 100644 --- a/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +++ b/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi @@ -12,7 +12,9 @@ module OpenAI ) end - # The arguments of the function call. + # The arguments of the function call. This is a JSON-encoded string representing + # the arguments passed to the function, for example + # `{"arg1": "value1", "arg2": 42}`. sig { returns(String) } attr_accessor :arguments @@ -24,7 +26,8 @@ module OpenAI sig { returns(Symbol) } attr_accessor :type - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. sig { returns(T.nilable(String)) } attr_reader :id @@ -38,7 +41,8 @@ module OpenAI sig { params(call_id: String).void } attr_writer :call_id - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. sig do returns( T.nilable( @@ -89,15 +93,19 @@ module OpenAI ).returns(T.attached_class) end def self.new( - # The arguments of the function call. + # The arguments of the function call. This is a JSON-encoded string representing + # the arguments passed to the function, for example + # `{"arg1": "value1", "arg2": 42}`. arguments:, # The name of the function being called. name:, - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. id: nil, # The ID of the function call. call_id: nil, - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. object: nil, # The status of the item. Has no effect on the conversation. status: nil, @@ -124,7 +132,8 @@ module OpenAI def to_hash end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. module Object extend OpenAI::Internal::Type::Enum diff --git a/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi b/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi index ff1cd4a1..7d7a9ce5 100644 --- a/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +++ b/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi @@ -16,7 +16,8 @@ module OpenAI sig { returns(String) } attr_accessor :call_id - # The output of the function call. + # The output of the function call, this is free text and can contain any + # information or simply be empty. sig { returns(String) } attr_accessor :output @@ -24,14 +25,16 @@ module OpenAI sig { returns(Symbol) } attr_accessor :type - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. sig { returns(T.nilable(String)) } attr_reader :id sig { params(id: String).void } attr_writer :id - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. sig do returns( T.nilable( @@ -83,11 +86,14 @@ module OpenAI def self.new( # The ID of the function call this output is for. call_id:, - # The output of the function call. + # The output of the function call, this is free text and can contain any + # information or simply be empty. output:, - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. id: nil, - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. object: nil, # The status of the item. Has no effect on the conversation. status: nil, @@ -113,7 +119,8 @@ module OpenAI def to_hash end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. module Object extend OpenAI::Internal::Type::Enum diff --git a/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi b/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi index c8c87b9c..cd4e6fa1 100644 --- a/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +++ b/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi @@ -30,14 +30,16 @@ module OpenAI sig { returns(Symbol) } attr_accessor :type - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. sig { returns(T.nilable(String)) } attr_reader :id sig { params(id: String).void } attr_writer :id - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. sig do returns( T.nilable( @@ -73,7 +75,12 @@ module OpenAI end attr_writer :status - # A system message item in a Realtime conversation. + # A system message in a Realtime conversation can be used to provide additional + # context or instructions to the model. This is similar but distinct from the + # instruction prompt provided at the start of a conversation, as system messages + # can be added at any point in the conversation. For major changes to the + # conversation's behavior, use instructions, but for smaller updates (e.g. "the + # user is now asking about a different topic"), use system messages. sig do params( content: @@ -92,9 +99,11 @@ module OpenAI def self.new( # The content of the message. content:, - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. id: nil, - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. object: nil, # The status of the item. Has no effect on the conversation. status: nil, @@ -217,7 +226,8 @@ module OpenAI end end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. module Object extend OpenAI::Internal::Type::Enum diff --git a/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi b/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi index b08688d7..f1ca045f 100644 --- a/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +++ b/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi @@ -30,14 +30,16 @@ module OpenAI sig { returns(Symbol) } attr_accessor :type - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. sig { returns(T.nilable(String)) } attr_reader :id sig { params(id: String).void } attr_writer :id - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. sig do returns( T.nilable( @@ -92,9 +94,11 @@ module OpenAI def self.new( # The content of the message. content:, - # The unique ID of the item. + # The unique ID of the item. This may be provided by the client or generated by + # the server. id: nil, - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. object: nil, # The status of the item. Has no effect on the conversation. status: nil, @@ -134,13 +138,43 @@ module OpenAI ) end - # Base64-encoded audio bytes (for `input_audio`). + # Base64-encoded audio bytes (for `input_audio`), these will be parsed as the + # format specified in the session input audio type configuration. This defaults to + # PCM 16-bit 24kHz mono if not specified. sig { returns(T.nilable(String)) } attr_reader :audio sig { params(audio: String).void } attr_writer :audio + # The detail level of the image (for `input_image`). `auto` will default to + # `high`. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol + ) + ) + end + attr_reader :detail + + sig do + params( + detail: + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol + ).void + end + attr_writer :detail + + # Base64-encoded image bytes (for `input_image`) as a data URI. For example + # `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`. Supported formats are PNG + # and JPEG. + sig { returns(T.nilable(String)) } + attr_reader :image_url + + sig { params(image_url: String).void } + attr_writer :image_url + # The text content (for `input_text`). sig { returns(T.nilable(String)) } attr_reader :text @@ -148,14 +182,15 @@ module OpenAI sig { params(text: String).void } attr_writer :text - # Transcript of the audio (for `input_audio`). + # Transcript of the audio (for `input_audio`). This is not sent to the model, but + # will be attached to the message item for reference. sig { returns(T.nilable(String)) } attr_reader :transcript sig { params(transcript: String).void } attr_writer :transcript - # The content type (`input_text` or `input_audio`). + # The content type (`input_text`, `input_audio`, or `input_image`). sig do returns( T.nilable( @@ -176,6 +211,9 @@ module OpenAI sig do params( audio: String, + detail: + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol, + image_url: String, text: String, transcript: String, type: @@ -183,13 +221,23 @@ module OpenAI ).returns(T.attached_class) end def self.new( - # Base64-encoded audio bytes (for `input_audio`). + # Base64-encoded audio bytes (for `input_audio`), these will be parsed as the + # format specified in the session input audio type configuration. This defaults to + # PCM 16-bit 24kHz mono if not specified. audio: nil, + # The detail level of the image (for `input_image`). `auto` will default to + # `high`. + detail: nil, + # Base64-encoded image bytes (for `input_image`) as a data URI. For example + # `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`. Supported formats are PNG + # and JPEG. + image_url: nil, # The text content (for `input_text`). text: nil, - # Transcript of the audio (for `input_audio`). + # Transcript of the audio (for `input_audio`). This is not sent to the model, but + # will be attached to the message item for reference. transcript: nil, - # The content type (`input_text` or `input_audio`). + # The content type (`input_text`, `input_audio`, or `input_image`). type: nil ) end @@ -198,6 +246,9 @@ module OpenAI override.returns( { audio: String, + detail: + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol, + image_url: String, text: String, transcript: String, type: @@ -208,7 +259,48 @@ module OpenAI def to_hash end - # The content type (`input_text` or `input_audio`). + # The detail level of the image (for `input_image`). `auto` will default to + # `high`. + module Detail + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + AUTO = + T.let( + :auto, + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol + ) + LOW = + T.let( + :low, + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol + ) + HIGH = + T.let( + :high, + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol + ] + ) + end + def self.values + end + end + + # The content type (`input_text`, `input_audio`, or `input_image`). module Type extend OpenAI::Internal::Type::Enum @@ -231,6 +323,11 @@ module OpenAI :input_audio, OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Type::TaggedSymbol ) + INPUT_IMAGE = + T.let( + :input_image, + OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Type::TaggedSymbol + ) sig do override.returns( @@ -244,7 +341,8 @@ module OpenAI end end - # Identifier for the API object being returned - always `realtime.item`. + # Identifier for the API object being returned - always `realtime.item`. Optional + # when creating a new item. module Object extend OpenAI::Internal::Type::Enum diff --git a/rbi/openai/models/realtime/realtime_response.rbi b/rbi/openai/models/realtime/realtime_response.rbi index bfb1402c..f0308593 100644 --- a/rbi/openai/models/realtime/realtime_response.rbi +++ b/rbi/openai/models/realtime/realtime_response.rbi @@ -9,20 +9,28 @@ module OpenAI T.any(OpenAI::Realtime::RealtimeResponse, OpenAI::Internal::AnyHash) end - # The unique ID of the response. + # The unique ID of the response, will look like `resp_1234`. sig { returns(T.nilable(String)) } attr_reader :id sig { params(id: String).void } attr_writer :id + # Configuration for audio output. + sig { returns(T.nilable(OpenAI::Realtime::RealtimeResponse::Audio)) } + attr_reader :audio + + sig do + params(audio: OpenAI::Realtime::RealtimeResponse::Audio::OrHash).void + end + attr_writer :audio + # Which conversation the response is added to, determined by the `conversation` # field in the `response.create` event. If `auto`, the response will be added to # the default conversation and the value of `conversation_id` will be an id like # `conv_1234`. If `none`, the response will not be added to any conversation and # the value of `conversation_id` will be `null`. If responses are being triggered - # by server VAD, the response will be added to the default conversation, thus the - # `conversation_id` will be an id like `conv_1234`. + # automatically by VAD the response will be added to the default conversation sig { returns(T.nilable(String)) } attr_reader :conversation_id @@ -46,26 +54,6 @@ module OpenAI sig { returns(T.nilable(T::Hash[Symbol, String])) } attr_accessor :metadata - # The set of modalities the model used to respond. If there are multiple - # modalities, the model will pick one, for example if `modalities` is - # `["text", "audio"]`, the model could be responding in either text or audio. - sig do - returns( - T.nilable( - T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol] - ) - ) - end - attr_reader :modalities - - sig do - params( - modalities: - T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol] - ).void - end - attr_writer :modalities - # The object type, must be `realtime.response`. sig do returns( @@ -123,23 +111,30 @@ module OpenAI end attr_writer :output - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # The set of modalities the model used to respond, currently the only possible + # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text + # transcript. Setting the output to mode `text` will disable audio output from the + # model. sig do returns( T.nilable( - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol + T::Array[ + OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol + ] ) ) end - attr_reader :output_audio_format + attr_reader :output_modalities sig do params( - output_audio_format: - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol + output_modalities: + T::Array[ + OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol + ] ).void end - attr_writer :output_audio_format + attr_writer :output_modalities # The final status of the response (`completed`, `cancelled`, `failed`, or # `incomplete`, `in_progress`). @@ -168,13 +163,6 @@ module OpenAI end attr_writer :status_details - # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - sig { returns(T.nilable(Float)) } - attr_reader :temperature - - sig { params(temperature: Float).void } - attr_writer :temperature - # Usage statistics for the Response, this will correspond to billing. A Realtime # API session will maintain a conversation context and append new Items to the # Conversation, thus output from previous turns (text and audio tokens) will @@ -187,34 +175,14 @@ module OpenAI end attr_writer :usage - # The voice the model used to respond. Current voice options are `alloy`, `ash`, - # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - sig do - returns( - T.nilable( - T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol) - ) - ) - end - attr_reader :voice - - sig do - params( - voice: - T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol) - ).void - end - attr_writer :voice - # The response resource. sig do params( id: String, + audio: OpenAI::Realtime::RealtimeResponse::Audio::OrHash, conversation_id: String, max_output_tokens: T.any(Integer, Symbol), metadata: T.nilable(T::Hash[Symbol, String]), - modalities: - T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol], object: OpenAI::Realtime::RealtimeResponse::Object::OrSymbol, output: T::Array[ @@ -230,26 +198,26 @@ module OpenAI OpenAI::Realtime::RealtimeMcpApprovalRequest::OrHash ) ], - output_audio_format: - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol, + output_modalities: + T::Array[ + OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol + ], status: OpenAI::Realtime::RealtimeResponse::Status::OrSymbol, status_details: OpenAI::Realtime::RealtimeResponseStatus::OrHash, - temperature: Float, - usage: OpenAI::Realtime::RealtimeResponseUsage::OrHash, - voice: - T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol) + usage: OpenAI::Realtime::RealtimeResponseUsage::OrHash ).returns(T.attached_class) end def self.new( - # The unique ID of the response. + # The unique ID of the response, will look like `resp_1234`. id: nil, + # Configuration for audio output. + audio: nil, # Which conversation the response is added to, determined by the `conversation` # field in the `response.create` event. If `auto`, the response will be added to # the default conversation and the value of `conversation_id` will be an id like # `conv_1234`. If `none`, the response will not be added to any conversation and # the value of `conversation_id` will be `null`. If responses are being triggered - # by server VAD, the response will be added to the default conversation, thus the - # `conversation_id` will be an id like `conv_1234`. + # automatically by VAD the response will be added to the default conversation conversation_id: nil, # Maximum number of output tokens for a single assistant response, inclusive of # tool calls, that was used in this response. @@ -261,31 +229,25 @@ module OpenAI # Keys are strings with a maximum length of 64 characters. Values are strings with # a maximum length of 512 characters. metadata: nil, - # The set of modalities the model used to respond. If there are multiple - # modalities, the model will pick one, for example if `modalities` is - # `["text", "audio"]`, the model could be responding in either text or audio. - modalities: nil, # The object type, must be `realtime.response`. object: nil, # The list of output items generated by the response. output: nil, - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - output_audio_format: nil, + # The set of modalities the model used to respond, currently the only possible + # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text + # transcript. Setting the output to mode `text` will disable audio output from the + # model. + output_modalities: nil, # The final status of the response (`completed`, `cancelled`, `failed`, or # `incomplete`, `in_progress`). status: nil, # Additional details about the status. status_details: nil, - # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - temperature: nil, # Usage statistics for the Response, this will correspond to billing. A Realtime # API session will maintain a conversation context and append new Items to the # Conversation, thus output from previous turns (text and audio tokens) will # become the input for later turns. - usage: nil, - # The voice the model used to respond. Current voice options are `alloy`, `ash`, - # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - voice: nil + usage: nil ) end @@ -293,13 +255,10 @@ module OpenAI override.returns( { id: String, + audio: OpenAI::Realtime::RealtimeResponse::Audio, conversation_id: String, max_output_tokens: T.any(Integer, Symbol), metadata: T.nilable(T::Hash[Symbol, String]), - modalities: - T::Array[ - OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol - ], object: OpenAI::Realtime::RealtimeResponse::Object::OrSymbol, output: T::Array[ @@ -315,69 +274,270 @@ module OpenAI OpenAI::Realtime::RealtimeMcpApprovalRequest ) ], - output_audio_format: - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol, + output_modalities: + T::Array[ + OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol + ], status: OpenAI::Realtime::RealtimeResponse::Status::OrSymbol, status_details: OpenAI::Realtime::RealtimeResponseStatus, - temperature: Float, - usage: OpenAI::Realtime::RealtimeResponseUsage, - voice: - T.any( - String, - OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol - ) + usage: OpenAI::Realtime::RealtimeResponseUsage } ) end def to_hash end - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls, that was used in this response. - module MaxOutputTokens - extend OpenAI::Internal::Type::Union + class Audio < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponse::Audio, + OpenAI::Internal::AnyHash + ) + end - Variants = T.type_alias { T.any(Integer, Symbol) } + sig do + returns( + T.nilable(OpenAI::Realtime::RealtimeResponse::Audio::Output) + ) + end + attr_reader :output + + sig do + params( + output: OpenAI::Realtime::RealtimeResponse::Audio::Output::OrHash + ).void + end + attr_writer :output + + # Configuration for audio output. + sig do + params( + output: OpenAI::Realtime::RealtimeResponse::Audio::Output::OrHash + ).returns(T.attached_class) + end + def self.new(output: nil) + end sig do override.returns( - T::Array[ - OpenAI::Realtime::RealtimeResponse::MaxOutputTokens::Variants - ] + { output: OpenAI::Realtime::RealtimeResponse::Audio::Output } ) end - def self.variants + def to_hash end - end - module Modality - extend OpenAI::Internal::Type::Enum + class Output < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponse::Audio::Output, + OpenAI::Internal::AnyHash + ) + end - TaggedSymbol = - T.type_alias do - T.all(Symbol, OpenAI::Realtime::RealtimeResponse::Modality) + # The format of the output audio. + sig do + returns( + T.nilable( + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ) + ) + ) end - OrSymbol = T.type_alias { T.any(Symbol, String) } + attr_reader :format_ - TEXT = - T.let( - :text, - OpenAI::Realtime::RealtimeResponse::Modality::TaggedSymbol - ) - AUDIO = - T.let( - :audio, - OpenAI::Realtime::RealtimeResponse::Modality::TaggedSymbol + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ) + ).void + end + attr_writer :format_ + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + sig do + returns( + T.nilable( + T.any( + String, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol + ) + ) + ) + end + attr_reader :voice + + sig do + params( + voice: + T.any( + String, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol + ) + ).void + end + attr_writer :voice + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ), + voice: + T.any( + String, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol + ) + ).returns(T.attached_class) + end + def self.new( + # The format of the output audio. + format_: nil, + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + voice: nil ) + end + + sig do + override.returns( + { + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ), + voice: + T.any( + String, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol + ) + } + ) + end + def to_hash + end + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + module Voice + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + String, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::Variants + ] + ) + end + def self.variants + end + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + ALLOY = + T.let( + :alloy, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + ASH = + T.let( + :ash, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + BALLAD = + T.let( + :ballad, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + CORAL = + T.let( + :coral, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + ECHO = + T.let( + :echo, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + SAGE = + T.let( + :sage, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + SHIMMER = + T.let( + :shimmer, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + VERSE = + T.let( + :verse, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + MARIN = + T.let( + :marin, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + CEDAR = + T.let( + :cedar, + OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol + ) + end + end + end + + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls, that was used in this response. + module MaxOutputTokens + extend OpenAI::Internal::Type::Union + + Variants = T.type_alias { T.any(Integer, Symbol) } sig do override.returns( T::Array[ - OpenAI::Realtime::RealtimeResponse::Modality::TaggedSymbol + OpenAI::Realtime::RealtimeResponse::MaxOutputTokens::Variants ] ) end - def self.values + def self.variants end end @@ -406,39 +566,30 @@ module OpenAI end end - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - module OutputAudioFormat + module OutputModality extend OpenAI::Internal::Type::Enum TaggedSymbol = T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat - ) + T.all(Symbol, OpenAI::Realtime::RealtimeResponse::OutputModality) end OrSymbol = T.type_alias { T.any(Symbol, String) } - PCM16 = - T.let( - :pcm16, - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol - ) - G711_ULAW = + TEXT = T.let( - :g711_ulaw, - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol + :text, + OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol ) - G711_ALAW = + AUDIO = T.let( - :g711_alaw, - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol + :audio, + OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol ) sig do override.returns( T::Array[ - OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol + OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol ] ) end @@ -491,82 +642,6 @@ module OpenAI def self.values end end - - # The voice the model used to respond. Current voice options are `alloy`, `ash`, - # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - module Voice - extend OpenAI::Internal::Type::Union - - Variants = - T.type_alias do - T.any( - String, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - end - - sig do - override.returns( - T::Array[OpenAI::Realtime::RealtimeResponse::Voice::Variants] - ) - end - def self.variants - end - - TaggedSymbol = - T.type_alias do - T.all(Symbol, OpenAI::Realtime::RealtimeResponse::Voice) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - ALLOY = - T.let( - :alloy, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - ASH = - T.let(:ash, OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol) - BALLAD = - T.let( - :ballad, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - CORAL = - T.let( - :coral, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - ECHO = - T.let( - :echo, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - SAGE = - T.let( - :sage, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - SHIMMER = - T.let( - :shimmer, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - VERSE = - T.let( - :verse, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - MARIN = - T.let( - :marin, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - CEDAR = - T.let( - :cedar, - OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol - ) - end end end end diff --git a/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi b/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi new file mode 100644 index 00000000..63c43918 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi @@ -0,0 +1,250 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeResponseCreateAudioOutput < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateAudioOutput, + OpenAI::Internal::AnyHash + ) + end + + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output + ) + ) + end + attr_reader :output + + sig do + params( + output: + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::OrHash + ).void + end + attr_writer :output + + # Configuration for audio input and output. + sig do + params( + output: + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::OrHash + ).returns(T.attached_class) + end + def self.new(output: nil) + end + + sig do + override.returns( + { + output: + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output + } + ) + end + def to_hash + end + + class Output < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output, + OpenAI::Internal::AnyHash + ) + end + + # The format of the output audio. + sig do + returns( + T.nilable( + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ) + ) + ) + end + attr_reader :format_ + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ) + ).void + end + attr_writer :format_ + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + sig do + returns( + T.nilable( + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::OrSymbol + ) + ) + ) + end + attr_reader :voice + + sig do + params( + voice: + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::OrSymbol + ) + ).void + end + attr_writer :voice + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ), + voice: + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::OrSymbol + ) + ).returns(T.attached_class) + end + def self.new( + # The format of the output audio. + format_: nil, + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + voice: nil + ) + end + + sig do + override.returns( + { + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ), + voice: + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::OrSymbol + ) + } + ) + end + def to_hash + end + + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. + module Voice + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::Variants + ] + ) + end + def self.variants + end + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + ALLOY = + T.let( + :alloy, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + ASH = + T.let( + :ash, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + BALLAD = + T.let( + :ballad, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + CORAL = + T.let( + :coral, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + ECHO = + T.let( + :echo, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + SAGE = + T.let( + :sage, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + SHIMMER = + T.let( + :shimmer, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + VERSE = + T.let( + :verse, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + MARIN = + T.let( + :marin, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + CEDAR = + T.let( + :cedar, + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol + ) + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi b/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi new file mode 100644 index 00000000..1c8bf7dd --- /dev/null +++ b/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi @@ -0,0 +1,616 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeResponseCreateMcpTool < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool, + OpenAI::Internal::AnyHash + ) + end + + # A label for this MCP server, used to identify it in tool calls. + sig { returns(String) } + attr_accessor :server_label + + # The type of the MCP tool. Always `mcp`. + sig { returns(Symbol) } + attr_accessor :type + + # List of allowed tool names or a filter object. + sig do + returns( + T.nilable( + T.any( + T::Array[String], + OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter + ) + ) + ) + end + attr_accessor :allowed_tools + + # An OAuth access token that can be used with a remote MCP server, either with a + # custom MCP server URL or a service connector. Your application must handle the + # OAuth authorization flow and provide the token here. + sig { returns(T.nilable(String)) } + attr_reader :authorization + + sig { params(authorization: String).void } + attr_writer :authorization + + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::OrSymbol + ) + ) + end + attr_reader :connector_id + + sig do + params( + connector_id: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::OrSymbol + ).void + end + attr_writer :connector_id + + # Optional HTTP headers to send to the MCP server. Use for authentication or other + # purposes. + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_accessor :headers + + # Specify which of the MCP server's tools require approval. + sig do + returns( + T.nilable( + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting::OrSymbol + ) + ) + ) + end + attr_accessor :require_approval + + # Optional description of the MCP server, used to provide more context. + sig { returns(T.nilable(String)) } + attr_reader :server_description + + sig { params(server_description: String).void } + attr_writer :server_description + + # The URL for the MCP server. One of `server_url` or `connector_id` must be + # provided. + sig { returns(T.nilable(String)) } + attr_reader :server_url + + sig { params(server_url: String).void } + attr_writer :server_url + + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + sig do + params( + server_label: String, + allowed_tools: + T.nilable( + T.any( + T::Array[String], + OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter::OrHash + ) + ), + authorization: String, + connector_id: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::OrSymbol, + headers: T.nilable(T::Hash[Symbol, String]), + require_approval: + T.nilable( + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::OrHash, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting::OrSymbol + ) + ), + server_description: String, + server_url: String, + type: Symbol + ).returns(T.attached_class) + end + def self.new( + # A label for this MCP server, used to identify it in tool calls. + server_label:, + # List of allowed tool names or a filter object. + allowed_tools: nil, + # An OAuth access token that can be used with a remote MCP server, either with a + # custom MCP server URL or a service connector. Your application must handle the + # OAuth authorization flow and provide the token here. + authorization: nil, + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + connector_id: nil, + # Optional HTTP headers to send to the MCP server. Use for authentication or other + # purposes. + headers: nil, + # Specify which of the MCP server's tools require approval. + require_approval: nil, + # Optional description of the MCP server, used to provide more context. + server_description: nil, + # The URL for the MCP server. One of `server_url` or `connector_id` must be + # provided. + server_url: nil, + # The type of the MCP tool. Always `mcp`. + type: :mcp + ) + end + + sig do + override.returns( + { + server_label: String, + type: Symbol, + allowed_tools: + T.nilable( + T.any( + T::Array[String], + OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter + ) + ), + authorization: String, + connector_id: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::OrSymbol, + headers: T.nilable(T::Hash[Symbol, String]), + require_approval: + T.nilable( + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting::OrSymbol + ) + ), + server_description: String, + server_url: String + } + ) + end + def to_hash + end + + # List of allowed tool names or a filter object. + module AllowedTools + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + T::Array[String], + OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter + ) + end + + class McpToolFilter < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter, + OpenAI::Internal::AnyHash + ) + end + + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :read_only + + sig { params(read_only: T::Boolean).void } + attr_writer :read_only + + # List of allowed tool names. + sig { returns(T.nilable(T::Array[String])) } + attr_reader :tool_names + + sig { params(tool_names: T::Array[String]).void } + attr_writer :tool_names + + # A filter object to specify which tools are allowed. + sig do + params( + read_only: T::Boolean, + tool_names: T::Array[String] + ).returns(T.attached_class) + end + def self.new( + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + read_only: nil, + # List of allowed tool names. + tool_names: nil + ) + end + + sig do + override.returns( + { read_only: T::Boolean, tool_names: T::Array[String] } + ) + end + def to_hash + end + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::Variants + ] + ) + end + def self.variants + end + + StringArray = + T.let( + OpenAI::Internal::Type::ArrayOf[String], + OpenAI::Internal::Type::Converter + ) + end + + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + module ConnectorID + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + CONNECTOR_DROPBOX = + T.let( + :connector_dropbox, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_GMAIL = + T.let( + :connector_gmail, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_GOOGLECALENDAR = + T.let( + :connector_googlecalendar, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_GOOGLEDRIVE = + T.let( + :connector_googledrive, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_MICROSOFTTEAMS = + T.let( + :connector_microsoftteams, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_OUTLOOKCALENDAR = + T.let( + :connector_outlookcalendar, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_OUTLOOKEMAIL = + T.let( + :connector_outlookemail, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_SHAREPOINT = + T.let( + :connector_sharepoint, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateMcpTool::ConnectorID::TaggedSymbol + ] + ) + end + def self.values + end + end + + # Specify which of the MCP server's tools require approval. + module RequireApproval + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ) + end + + class McpToolApprovalFilter < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter, + OpenAI::Internal::AnyHash + ) + end + + # A filter object to specify which tools are allowed. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always + ) + ) + end + attr_reader :always + + sig do + params( + always: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always::OrHash + ).void + end + attr_writer :always + + # A filter object to specify which tools are allowed. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never + ) + ) + end + attr_reader :never + + sig do + params( + never: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never::OrHash + ).void + end + attr_writer :never + + # Specify which of the MCP server's tools require approval. Can be `always`, + # `never`, or a filter object associated with tools that require approval. + sig do + params( + always: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always::OrHash, + never: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never::OrHash + ).returns(T.attached_class) + end + def self.new( + # A filter object to specify which tools are allowed. + always: nil, + # A filter object to specify which tools are allowed. + never: nil + ) + end + + sig do + override.returns( + { + always: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always, + never: + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never + } + ) + end + def to_hash + end + + class Always < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always, + OpenAI::Internal::AnyHash + ) + end + + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :read_only + + sig { params(read_only: T::Boolean).void } + attr_writer :read_only + + # List of allowed tool names. + sig { returns(T.nilable(T::Array[String])) } + attr_reader :tool_names + + sig { params(tool_names: T::Array[String]).void } + attr_writer :tool_names + + # A filter object to specify which tools are allowed. + sig do + params( + read_only: T::Boolean, + tool_names: T::Array[String] + ).returns(T.attached_class) + end + def self.new( + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + read_only: nil, + # List of allowed tool names. + tool_names: nil + ) + end + + sig do + override.returns( + { read_only: T::Boolean, tool_names: T::Array[String] } + ) + end + def to_hash + end + end + + class Never < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never, + OpenAI::Internal::AnyHash + ) + end + + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :read_only + + sig { params(read_only: T::Boolean).void } + attr_writer :read_only + + # List of allowed tool names. + sig { returns(T.nilable(T::Array[String])) } + attr_reader :tool_names + + sig { params(tool_names: T::Array[String]).void } + attr_writer :tool_names + + # A filter object to specify which tools are allowed. + sig do + params( + read_only: T::Boolean, + tool_names: T::Array[String] + ).returns(T.attached_class) + end + def self.new( + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + read_only: nil, + # List of allowed tool names. + tool_names: nil + ) + end + + sig do + override.returns( + { read_only: T::Boolean, tool_names: T::Array[String] } + ) + end + def to_hash + end + end + end + + # Specify a single approval policy for all tools. One of `always` or `never`. When + # set to `always`, all tools will require approval. When set to `never`, all tools + # will not require approval. + module McpToolApprovalSetting + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + ALWAYS = + T.let( + :always, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ) + NEVER = + T.let( + :never, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ] + ) + end + def self.values + end + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::Variants + ] + ) + end + def self.variants + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_response_create_params.rbi b/rbi/openai/models/realtime/realtime_response_create_params.rbi new file mode 100644 index 00000000..e5e98c2f --- /dev/null +++ b/rbi/openai/models/realtime/realtime_response_create_params.rbi @@ -0,0 +1,529 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeResponseCreateParams < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseCreateParams, + OpenAI::Internal::AnyHash + ) + end + + # Configuration for audio input and output. + sig do + returns( + T.nilable(OpenAI::Realtime::RealtimeResponseCreateAudioOutput) + ) + end + attr_reader :audio + + sig do + params( + audio: OpenAI::Realtime::RealtimeResponseCreateAudioOutput::OrHash + ).void + end + attr_writer :audio + + # Controls which conversation the response is added to. Currently supports `auto` + # and `none`, with `auto` as the default value. The `auto` value means that the + # contents of the response will be added to the default conversation. Set this to + # `none` to create an out-of-band response which will not add items to default + # conversation. + sig do + returns( + T.nilable( + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::OrSymbol + ) + ) + ) + end + attr_reader :conversation + + sig do + params( + conversation: + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::OrSymbol + ) + ).void + end + attr_writer :conversation + + # Input items to include in the prompt for the model. Using this field creates a + # new context for this Response instead of using the default conversation. An + # empty array `[]` will clear the context for this Response. Note that this can + # include references to items that previously appeared in the session using their + # id. + sig do + returns( + T.nilable( + T::Array[ + T.any( + OpenAI::Realtime::RealtimeConversationItemSystemMessage, + OpenAI::Realtime::RealtimeConversationItemUserMessage, + OpenAI::Realtime::RealtimeConversationItemAssistantMessage, + OpenAI::Realtime::RealtimeConversationItemFunctionCall, + OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput, + OpenAI::Realtime::RealtimeMcpApprovalResponse, + OpenAI::Realtime::RealtimeMcpListTools, + OpenAI::Realtime::RealtimeMcpToolCall, + OpenAI::Realtime::RealtimeMcpApprovalRequest + ) + ] + ) + ) + end + attr_reader :input + + sig do + params( + input: + T::Array[ + T.any( + OpenAI::Realtime::RealtimeConversationItemSystemMessage::OrHash, + OpenAI::Realtime::RealtimeConversationItemUserMessage::OrHash, + OpenAI::Realtime::RealtimeConversationItemAssistantMessage::OrHash, + OpenAI::Realtime::RealtimeConversationItemFunctionCall::OrHash, + OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput::OrHash, + OpenAI::Realtime::RealtimeMcpApprovalResponse::OrHash, + OpenAI::Realtime::RealtimeMcpListTools::OrHash, + OpenAI::Realtime::RealtimeMcpToolCall::OrHash, + OpenAI::Realtime::RealtimeMcpApprovalRequest::OrHash + ) + ] + ).void + end + attr_writer :input + + # The default system instructions (i.e. system message) prepended to model calls. + # This field allows the client to guide the model on desired responses. The model + # can be instructed on response content and format, (e.g. "be extremely succinct", + # "act friendly", "here are examples of good responses") and on audio behavior + # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The + # instructions are not guaranteed to be followed by the model, but they provide + # guidance to the model on the desired behavior. Note that the server sets default + # instructions which will be used if this field is not set and are visible in the + # `session.created` event at the start of the session. + sig { returns(T.nilable(String)) } + attr_reader :instructions + + sig { params(instructions: String).void } + attr_writer :instructions + + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + # `inf` for the maximum available tokens for a given model. Defaults to `inf`. + sig { returns(T.nilable(T.any(Integer, Symbol))) } + attr_reader :max_output_tokens + + sig { params(max_output_tokens: T.any(Integer, Symbol)).void } + attr_writer :max_output_tokens + + # Set of 16 key-value pairs that can be attached to an object. This can be useful + # for storing additional information about the object in a structured format, and + # querying for objects via API or the dashboard. + # + # Keys are strings with a maximum length of 64 characters. Values are strings with + # a maximum length of 512 characters. + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_accessor :metadata + + # The set of modalities the model used to respond, currently the only possible + # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text + # transcript. Setting the output to mode `text` will disable audio output from the + # model. + sig do + returns( + T.nilable( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::OrSymbol + ] + ) + ) + end + attr_reader :output_modalities + + sig do + params( + output_modalities: + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::OrSymbol + ] + ).void + end + attr_writer :output_modalities + + # Reference to a prompt template and its variables. + # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + sig { returns(T.nilable(OpenAI::Responses::ResponsePrompt)) } + attr_reader :prompt + + sig do + params( + prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash) + ).void + end + attr_writer :prompt + + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + sig do + returns( + T.nilable( + T.any( + OpenAI::Responses::ToolChoiceOptions::OrSymbol, + OpenAI::Responses::ToolChoiceFunction, + OpenAI::Responses::ToolChoiceMcp + ) + ) + ) + end + attr_reader :tool_choice + + sig do + params( + tool_choice: + T.any( + OpenAI::Responses::ToolChoiceOptions::OrSymbol, + OpenAI::Responses::ToolChoiceFunction::OrHash, + OpenAI::Responses::ToolChoiceMcp::OrHash + ) + ).void + end + attr_writer :tool_choice + + # Tools available to the model. + sig do + returns( + T.nilable( + T::Array[ + T.any( + OpenAI::Realtime::Models, + OpenAI::Realtime::RealtimeResponseCreateMcpTool + ) + ] + ) + ) + end + attr_reader :tools + + sig do + params( + tools: + T::Array[ + T.any( + OpenAI::Realtime::Models::OrHash, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::OrHash + ) + ] + ).void + end + attr_writer :tools + + # Create a new Realtime response with these parameters + sig do + params( + audio: OpenAI::Realtime::RealtimeResponseCreateAudioOutput::OrHash, + conversation: + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::OrSymbol + ), + input: + T::Array[ + T.any( + OpenAI::Realtime::RealtimeConversationItemSystemMessage::OrHash, + OpenAI::Realtime::RealtimeConversationItemUserMessage::OrHash, + OpenAI::Realtime::RealtimeConversationItemAssistantMessage::OrHash, + OpenAI::Realtime::RealtimeConversationItemFunctionCall::OrHash, + OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput::OrHash, + OpenAI::Realtime::RealtimeMcpApprovalResponse::OrHash, + OpenAI::Realtime::RealtimeMcpListTools::OrHash, + OpenAI::Realtime::RealtimeMcpToolCall::OrHash, + OpenAI::Realtime::RealtimeMcpApprovalRequest::OrHash + ) + ], + instructions: String, + max_output_tokens: T.any(Integer, Symbol), + metadata: T.nilable(T::Hash[Symbol, String]), + output_modalities: + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::OrSymbol + ], + prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash), + tool_choice: + T.any( + OpenAI::Responses::ToolChoiceOptions::OrSymbol, + OpenAI::Responses::ToolChoiceFunction::OrHash, + OpenAI::Responses::ToolChoiceMcp::OrHash + ), + tools: + T::Array[ + T.any( + OpenAI::Realtime::Models::OrHash, + OpenAI::Realtime::RealtimeResponseCreateMcpTool::OrHash + ) + ] + ).returns(T.attached_class) + end + def self.new( + # Configuration for audio input and output. + audio: nil, + # Controls which conversation the response is added to. Currently supports `auto` + # and `none`, with `auto` as the default value. The `auto` value means that the + # contents of the response will be added to the default conversation. Set this to + # `none` to create an out-of-band response which will not add items to default + # conversation. + conversation: nil, + # Input items to include in the prompt for the model. Using this field creates a + # new context for this Response instead of using the default conversation. An + # empty array `[]` will clear the context for this Response. Note that this can + # include references to items that previously appeared in the session using their + # id. + input: nil, + # The default system instructions (i.e. system message) prepended to model calls. + # This field allows the client to guide the model on desired responses. The model + # can be instructed on response content and format, (e.g. "be extremely succinct", + # "act friendly", "here are examples of good responses") and on audio behavior + # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The + # instructions are not guaranteed to be followed by the model, but they provide + # guidance to the model on the desired behavior. Note that the server sets default + # instructions which will be used if this field is not set and are visible in the + # `session.created` event at the start of the session. + instructions: nil, + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + # `inf` for the maximum available tokens for a given model. Defaults to `inf`. + max_output_tokens: nil, + # Set of 16 key-value pairs that can be attached to an object. This can be useful + # for storing additional information about the object in a structured format, and + # querying for objects via API or the dashboard. + # + # Keys are strings with a maximum length of 64 characters. Values are strings with + # a maximum length of 512 characters. + metadata: nil, + # The set of modalities the model used to respond, currently the only possible + # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text + # transcript. Setting the output to mode `text` will disable audio output from the + # model. + output_modalities: nil, + # Reference to a prompt template and its variables. + # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + prompt: nil, + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + tool_choice: nil, + # Tools available to the model. + tools: nil + ) + end + + sig do + override.returns( + { + audio: OpenAI::Realtime::RealtimeResponseCreateAudioOutput, + conversation: + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::OrSymbol + ), + input: + T::Array[ + T.any( + OpenAI::Realtime::RealtimeConversationItemSystemMessage, + OpenAI::Realtime::RealtimeConversationItemUserMessage, + OpenAI::Realtime::RealtimeConversationItemAssistantMessage, + OpenAI::Realtime::RealtimeConversationItemFunctionCall, + OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput, + OpenAI::Realtime::RealtimeMcpApprovalResponse, + OpenAI::Realtime::RealtimeMcpListTools, + OpenAI::Realtime::RealtimeMcpToolCall, + OpenAI::Realtime::RealtimeMcpApprovalRequest + ) + ], + instructions: String, + max_output_tokens: T.any(Integer, Symbol), + metadata: T.nilable(T::Hash[Symbol, String]), + output_modalities: + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::OrSymbol + ], + prompt: T.nilable(OpenAI::Responses::ResponsePrompt), + tool_choice: + T.any( + OpenAI::Responses::ToolChoiceOptions::OrSymbol, + OpenAI::Responses::ToolChoiceFunction, + OpenAI::Responses::ToolChoiceMcp + ), + tools: + T::Array[ + T.any( + OpenAI::Realtime::Models, + OpenAI::Realtime::RealtimeResponseCreateMcpTool + ) + ] + } + ) + end + def to_hash + end + + # Controls which conversation the response is added to. Currently supports `auto` + # and `none`, with `auto` as the default value. The `auto` value means that the + # contents of the response will be added to the default conversation. Set this to + # `none` to create an out-of-band response which will not add items to default + # conversation. + module Conversation + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + String, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::TaggedSymbol + ) + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::Variants + ] + ) + end + def self.variants + end + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + AUTO = + T.let( + :auto, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::TaggedSymbol + ) + NONE = + T.let( + :none, + OpenAI::Realtime::RealtimeResponseCreateParams::Conversation::TaggedSymbol + ) + end + + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + # `inf` for the maximum available tokens for a given model. Defaults to `inf`. + module MaxOutputTokens + extend OpenAI::Internal::Type::Union + + Variants = T.type_alias { T.any(Integer, Symbol) } + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::MaxOutputTokens::Variants + ] + ) + end + def self.variants + end + end + + module OutputModality + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + TEXT = + T.let( + :text, + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::TaggedSymbol + ) + AUDIO = + T.let( + :audio, + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::TaggedSymbol + ] + ) + end + def self.values + end + end + + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + module ToolChoice + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Responses::ToolChoiceOptions::TaggedSymbol, + OpenAI::Responses::ToolChoiceFunction, + OpenAI::Responses::ToolChoiceMcp + ) + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::ToolChoice::Variants + ] + ) + end + def self.variants + end + end + + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + module Tool + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::Models, + OpenAI::Realtime::RealtimeResponseCreateMcpTool + ) + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeResponseCreateParams::Tool::Variants + ] + ) + end + def self.variants + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_response_usage.rbi b/rbi/openai/models/realtime/realtime_response_usage.rbi index 6303d410..74a34c97 100644 --- a/rbi/openai/models/realtime/realtime_response_usage.rbi +++ b/rbi/openai/models/realtime/realtime_response_usage.rbi @@ -12,7 +12,10 @@ module OpenAI ) end - # Details about the input tokens used in the Response. + # Details about the input tokens used in the Response. Cached tokens are tokens + # from previous turns in the conversation that are included as context for the + # current response. Cached tokens here are counted as a subset of input tokens, + # meaning input tokens will include cached and uncached tokens. sig do returns( T.nilable(OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails) @@ -84,7 +87,10 @@ module OpenAI ).returns(T.attached_class) end def self.new( - # Details about the input tokens used in the Response. + # Details about the input tokens used in the Response. Cached tokens are tokens + # from previous turns in the conversation that are included as context for the + # current response. Cached tokens here are counted as a subset of input tokens, + # meaning input tokens will include cached and uncached tokens. input_token_details: nil, # The number of input tokens used in the Response, including text and audio # tokens. diff --git a/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi b/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi index 527ed277..ad04d122 100644 --- a/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +++ b/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi @@ -12,41 +12,76 @@ module OpenAI ) end - # The number of audio tokens used in the Response. + # The number of audio tokens used as input for the Response. sig { returns(T.nilable(Integer)) } attr_reader :audio_tokens sig { params(audio_tokens: Integer).void } attr_writer :audio_tokens - # The number of cached tokens used in the Response. + # The number of cached tokens used as input for the Response. sig { returns(T.nilable(Integer)) } attr_reader :cached_tokens sig { params(cached_tokens: Integer).void } attr_writer :cached_tokens - # The number of text tokens used in the Response. + # Details about the cached tokens used as input for the Response. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails + ) + ) + end + attr_reader :cached_tokens_details + + sig do + params( + cached_tokens_details: + OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails::OrHash + ).void + end + attr_writer :cached_tokens_details + + # The number of image tokens used as input for the Response. + sig { returns(T.nilable(Integer)) } + attr_reader :image_tokens + + sig { params(image_tokens: Integer).void } + attr_writer :image_tokens + + # The number of text tokens used as input for the Response. sig { returns(T.nilable(Integer)) } attr_reader :text_tokens sig { params(text_tokens: Integer).void } attr_writer :text_tokens - # Details about the input tokens used in the Response. + # Details about the input tokens used in the Response. Cached tokens are tokens + # from previous turns in the conversation that are included as context for the + # current response. Cached tokens here are counted as a subset of input tokens, + # meaning input tokens will include cached and uncached tokens. sig do params( audio_tokens: Integer, cached_tokens: Integer, + cached_tokens_details: + OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails::OrHash, + image_tokens: Integer, text_tokens: Integer ).returns(T.attached_class) end def self.new( - # The number of audio tokens used in the Response. + # The number of audio tokens used as input for the Response. audio_tokens: nil, - # The number of cached tokens used in the Response. + # The number of cached tokens used as input for the Response. cached_tokens: nil, - # The number of text tokens used in the Response. + # Details about the cached tokens used as input for the Response. + cached_tokens_details: nil, + # The number of image tokens used as input for the Response. + image_tokens: nil, + # The number of text tokens used as input for the Response. text_tokens: nil ) end @@ -56,12 +91,76 @@ module OpenAI { audio_tokens: Integer, cached_tokens: Integer, + cached_tokens_details: + OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails, + image_tokens: Integer, text_tokens: Integer } ) end def to_hash end + + class CachedTokensDetails < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails, + OpenAI::Internal::AnyHash + ) + end + + # The number of cached audio tokens used as input for the Response. + sig { returns(T.nilable(Integer)) } + attr_reader :audio_tokens + + sig { params(audio_tokens: Integer).void } + attr_writer :audio_tokens + + # The number of cached image tokens used as input for the Response. + sig { returns(T.nilable(Integer)) } + attr_reader :image_tokens + + sig { params(image_tokens: Integer).void } + attr_writer :image_tokens + + # The number of cached text tokens used as input for the Response. + sig { returns(T.nilable(Integer)) } + attr_reader :text_tokens + + sig { params(text_tokens: Integer).void } + attr_writer :text_tokens + + # Details about the cached tokens used as input for the Response. + sig do + params( + audio_tokens: Integer, + image_tokens: Integer, + text_tokens: Integer + ).returns(T.attached_class) + end + def self.new( + # The number of cached audio tokens used as input for the Response. + audio_tokens: nil, + # The number of cached image tokens used as input for the Response. + image_tokens: nil, + # The number of cached text tokens used as input for the Response. + text_tokens: nil + ) + end + + sig do + override.returns( + { + audio_tokens: Integer, + image_tokens: Integer, + text_tokens: Integer + } + ) + end + def to_hash + end + end end end end diff --git a/rbi/openai/models/realtime/realtime_server_event.rbi b/rbi/openai/models/realtime/realtime_server_event.rbi index d54099c7..971b61f2 100644 --- a/rbi/openai/models/realtime/realtime_server_event.rbi +++ b/rbi/openai/models/realtime/realtime_server_event.rbi @@ -96,7 +96,10 @@ module OpenAI attr_accessor :type # Returned when a conversation item is retrieved with - # `conversation.item.retrieve`. + # `conversation.item.retrieve`. This is provided as a way to fetch the server's + # representation of an item, for example to get access to the post-processed audio + # data after noise cancellation and VAD. It includes the full content of the Item, + # including audio data. sig do params( event_id: String, diff --git a/rbi/openai/models/realtime/realtime_session.rbi b/rbi/openai/models/realtime/realtime_session.rbi index 4ad2f054..ba69f375 100644 --- a/rbi/openai/models/realtime/realtime_session.rbi +++ b/rbi/openai/models/realtime/realtime_session.rbi @@ -86,21 +86,13 @@ module OpenAI # and should be treated as guidance of input audio content rather than precisely # what the model heard. The client can optionally set the language and prompt for # transcription, these offer additional guidance to the transcription service. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeSession::InputAudioTranscription - ) - ) - end + sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) } attr_reader :input_audio_transcription sig do params( input_audio_transcription: - T.nilable( - OpenAI::Realtime::RealtimeSession::InputAudioTranscription::OrHash - ) + T.nilable(OpenAI::Realtime::AudioTranscription::OrHash) ).void end attr_writer :input_audio_transcription @@ -233,16 +225,10 @@ module OpenAI attr_writer :tool_choice # Tools (functions) available to the model. - sig do - returns(T.nilable(T::Array[OpenAI::Realtime::RealtimeSession::Tool])) - end + sig { returns(T.nilable(T::Array[OpenAI::Realtime::Models])) } attr_reader :tools - sig do - params( - tools: T::Array[OpenAI::Realtime::RealtimeSession::Tool::OrHash] - ).void - end + sig { params(tools: T::Array[OpenAI::Realtime::Models::OrHash]).void } attr_writer :tools # Configuration options for tracing. Set to null to disable tracing. Once tracing @@ -321,9 +307,7 @@ module OpenAI input_audio_noise_reduction: OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::OrHash, input_audio_transcription: - T.nilable( - OpenAI::Realtime::RealtimeSession::InputAudioTranscription::OrHash - ), + T.nilable(OpenAI::Realtime::AudioTranscription::OrHash), instructions: String, max_response_output_tokens: T.any(Integer, Symbol), modalities: @@ -336,7 +320,7 @@ module OpenAI speed: Float, temperature: Float, tool_choice: String, - tools: T::Array[OpenAI::Realtime::RealtimeSession::Tool::OrHash], + tools: T::Array[OpenAI::Realtime::Models::OrHash], tracing: T.nilable( T.any( @@ -460,9 +444,7 @@ module OpenAI input_audio_noise_reduction: OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction, input_audio_transcription: - T.nilable( - OpenAI::Realtime::RealtimeSession::InputAudioTranscription - ), + T.nilable(OpenAI::Realtime::AudioTranscription), instructions: String, max_response_output_tokens: T.any(Integer, Symbol), modalities: @@ -475,7 +457,7 @@ module OpenAI speed: Float, temperature: Float, tool_choice: String, - tools: T::Array[OpenAI::Realtime::RealtimeSession::Tool], + tools: T::Array[OpenAI::Realtime::Models], tracing: T.nilable( T.any( @@ -572,19 +554,12 @@ module OpenAI # headphones, `far_field` is for far-field microphones such as laptop or # conference room microphones. sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol - ) - ) + returns(T.nilable(OpenAI::Realtime::NoiseReductionType::OrSymbol)) end attr_reader :type sig do - params( - type: - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol - ).void + params(type: OpenAI::Realtime::NoiseReductionType::OrSymbol).void end attr_writer :type @@ -595,8 +570,7 @@ module OpenAI # perception of the input audio. sig do params( - type: - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol + type: OpenAI::Realtime::NoiseReductionType::OrSymbol ).returns(T.attached_class) end def self.new( @@ -609,123 +583,7 @@ module OpenAI sig do override.returns( - { - type: - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol - } - ) - end - def to_hash - end - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - NEAR_FIELD = - T.let( - :near_field, - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::TaggedSymbol - ) - FAR_FIELD = - T.let( - :far_field, - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class InputAudioTranscription < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeSession::InputAudioTranscription, - OpenAI::Internal::AnyHash - ) - end - - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - sig { returns(T.nilable(String)) } - attr_reader :language - - sig { params(language: String).void } - attr_writer :language - - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - sig { returns(T.nilable(String)) } - attr_reader :model - - sig { params(model: String).void } - attr_writer :model - - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - sig { returns(T.nilable(String)) } - attr_reader :prompt - - sig { params(prompt: String).void } - attr_writer :prompt - - # Configuration for input audio transcription, defaults to off and can be set to - # `null` to turn off once on. Input audio transcription is not native to the - # model, since the model consumes audio directly. Transcription runs - # asynchronously through - # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) - # and should be treated as guidance of input audio content rather than precisely - # what the model heard. The client can optionally set the language and prompt for - # transcription, these offer additional guidance to the transcription service. - sig do - params(language: String, model: String, prompt: String).returns( - T.attached_class - ) - end - def self.new( - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - language: nil, - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - model: nil, - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - prompt: nil - ) - end - - sig do - override.returns( - { language: String, model: String, prompt: String } + { type: OpenAI::Realtime::NoiseReductionType::OrSymbol } ) end def to_hash @@ -908,114 +766,6 @@ module OpenAI end end - class Tool < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeSession::Tool, - OpenAI::Internal::AnyHash - ) - end - - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - sig { returns(T.nilable(String)) } - attr_reader :description - - sig { params(description: String).void } - attr_writer :description - - # The name of the function. - sig { returns(T.nilable(String)) } - attr_reader :name - - sig { params(name: String).void } - attr_writer :name - - # Parameters of the function in JSON Schema. - sig { returns(T.nilable(T.anything)) } - attr_reader :parameters - - sig { params(parameters: T.anything).void } - attr_writer :parameters - - # The type of the tool, i.e. `function`. - sig do - returns( - T.nilable(OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol) - ) - end - attr_reader :type - - sig do - params( - type: OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol - ).void - end - attr_writer :type - - sig do - params( - description: String, - name: String, - parameters: T.anything, - type: OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - description: nil, - # The name of the function. - name: nil, - # Parameters of the function in JSON Schema. - parameters: nil, - # The type of the tool, i.e. `function`. - type: nil - ) - end - - sig do - override.returns( - { - description: String, - name: String, - parameters: T.anything, - type: OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol - } - ) - end - def to_hash - end - - # The type of the tool, i.e. `function`. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all(Symbol, OpenAI::Realtime::RealtimeSession::Tool::Type) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - FUNCTION = - T.let( - :function, - OpenAI::Realtime::RealtimeSession::Tool::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeSession::Tool::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - # Configuration options for tracing. Set to null to disable tracing. Once tracing # is enabled for a session, the configuration cannot be modified. # diff --git a/rbi/openai/models/realtime/realtime_session_client_secret.rbi b/rbi/openai/models/realtime/realtime_session_client_secret.rbi new file mode 100644 index 00000000..b23e1135 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_session_client_secret.rbi @@ -0,0 +1,49 @@ +# typed: strong + +module OpenAI + module Models + RealtimeSessionClientSecret = Realtime::RealtimeSessionClientSecret + + module Realtime + class RealtimeSessionClientSecret < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionClientSecret, + OpenAI::Internal::AnyHash + ) + end + + # Timestamp for when the token expires. Currently, all tokens expire after one + # minute. + sig { returns(Integer) } + attr_accessor :expires_at + + # Ephemeral key usable in client environments to authenticate connections to the + # Realtime API. Use this in client-side environments rather than a standard API + # token, which should only be used server-side. + sig { returns(String) } + attr_accessor :value + + # Ephemeral key returned by the API. + sig do + params(expires_at: Integer, value: String).returns(T.attached_class) + end + def self.new( + # Timestamp for when the token expires. Currently, all tokens expire after one + # minute. + expires_at:, + # Ephemeral key usable in client environments to authenticate connections to the + # Realtime API. Use this in client-side environments rather than a standard API + # token, which should only be used server-side. + value: + ) + end + + sig { override.returns({ expires_at: Integer, value: String }) } + def to_hash + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_session_create_request.rbi b/rbi/openai/models/realtime/realtime_session_create_request.rbi index 4eab2fb9..c5b7f7c2 100644 --- a/rbi/openai/models/realtime/realtime_session_create_request.rbi +++ b/rbi/openai/models/realtime/realtime_session_create_request.rbi @@ -12,17 +12,6 @@ module OpenAI ) end - # The Realtime model used for this session. - sig do - returns( - T.any( - String, - OpenAI::Realtime::RealtimeSessionCreateRequest::Model::OrSymbol - ) - ) - end - attr_accessor :model - # The type of session to create. Always `realtime` for the Realtime API. sig { returns(Symbol) } attr_accessor :type @@ -36,21 +25,10 @@ module OpenAI end attr_writer :audio - # Configuration options for the generated client secret. - sig { returns(T.nilable(OpenAI::Realtime::RealtimeClientSecretConfig)) } - attr_reader :client_secret - - sig do - params( - client_secret: OpenAI::Realtime::RealtimeClientSecretConfig::OrHash - ).void - end - attr_writer :client_secret - # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. sig do returns( T.nilable( @@ -98,8 +76,34 @@ module OpenAI sig { params(max_output_tokens: T.any(Integer, Symbol)).void } attr_writer :max_output_tokens - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. + # The Realtime model used for this session. + sig do + returns( + T.nilable( + T.any( + String, + OpenAI::Realtime::RealtimeSessionCreateRequest::Model::OrSymbol + ) + ) + ) + end + attr_reader :model + + sig do + params( + model: + T.any( + String, + OpenAI::Realtime::RealtimeSessionCreateRequest::Model::OrSymbol + ) + ).void + end + attr_writer :model + + # The set of modalities the model can respond with. It defaults to `["audio"]`, + # indicating that the model will respond with audio plus a transcript. `["text"]` + # can be used to make the model respond with text only. It is not possible to + # request both `text` and `audio` at the same time. sig do returns( T.nilable( @@ -133,14 +137,6 @@ module OpenAI end attr_writer :prompt - # Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a - # temperature of 0.8 is highly recommended for best performance. - sig { returns(T.nilable(Float)) } - attr_reader :temperature - - sig { params(temperature: Float).void } - attr_writer :temperature - # How the model chooses tools. Provide one of the string modes or force a specific # function/MCP tool. sig do @@ -174,7 +170,7 @@ module OpenAI T.nilable( T::Array[ T.any( - OpenAI::Realtime::RealtimeToolsConfigUnion::Function, + OpenAI::Realtime::Models, OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp ) ] @@ -188,7 +184,7 @@ module OpenAI tools: T::Array[ T.any( - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::OrHash, + OpenAI::Realtime::Models::OrHash, OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp::OrHash ) ] @@ -196,8 +192,9 @@ module OpenAI end attr_writer :tools - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. @@ -214,14 +211,13 @@ module OpenAI attr_accessor :tracing # Controls how the realtime conversation is truncated prior to model inference. - # The default is `auto`. When set to `retention_ratio`, the server retains a - # fraction of the conversation tokens prior to the instructions. + # The default is `auto`. sig do returns( T.nilable( T.any( OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol, - OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation + OpenAI::Realtime::RealtimeTruncationRetentionRatio ) ) ) @@ -233,7 +229,7 @@ module OpenAI truncation: T.any( OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol, - OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation::OrHash + OpenAI::Realtime::RealtimeTruncationRetentionRatio::OrHash ) ).void end @@ -242,25 +238,23 @@ module OpenAI # Realtime session object configuration. sig do params( - model: - T.any( - String, - OpenAI::Realtime::RealtimeSessionCreateRequest::Model::OrSymbol - ), audio: OpenAI::Realtime::RealtimeAudioConfig::OrHash, - client_secret: OpenAI::Realtime::RealtimeClientSecretConfig::OrHash, include: T::Array[ OpenAI::Realtime::RealtimeSessionCreateRequest::Include::OrSymbol ], instructions: String, max_output_tokens: T.any(Integer, Symbol), + model: + T.any( + String, + OpenAI::Realtime::RealtimeSessionCreateRequest::Model::OrSymbol + ), output_modalities: T::Array[ OpenAI::Realtime::RealtimeSessionCreateRequest::OutputModality::OrSymbol ], prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash), - temperature: Float, tool_choice: T.any( OpenAI::Responses::ToolChoiceOptions::OrSymbol, @@ -270,7 +264,7 @@ module OpenAI tools: T::Array[ T.any( - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::OrHash, + OpenAI::Realtime::Models::OrHash, OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp::OrHash ) ], @@ -284,22 +278,18 @@ module OpenAI truncation: T.any( OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol, - OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation::OrHash + OpenAI::Realtime::RealtimeTruncationRetentionRatio::OrHash ), type: Symbol ).returns(T.attached_class) end def self.new( - # The Realtime model used for this session. - model:, # Configuration for input and output audio. audio: nil, - # Configuration options for the generated client secret. - client_secret: nil, # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. include: nil, # The default system instructions (i.e. system message) prepended to model calls. # This field allows the client to guide the model on desired responses. The model @@ -317,29 +307,30 @@ module OpenAI # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or # `inf` for the maximum available tokens for a given model. Defaults to `inf`. max_output_tokens: nil, - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. + # The Realtime model used for this session. + model: nil, + # The set of modalities the model can respond with. It defaults to `["audio"]`, + # indicating that the model will respond with audio plus a transcript. `["text"]` + # can be used to make the model respond with text only. It is not possible to + # request both `text` and `audio` at the same time. output_modalities: nil, # Reference to a prompt template and its variables. # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). prompt: nil, - # Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a - # temperature of 0.8 is highly recommended for best performance. - temperature: nil, # How the model chooses tools. Provide one of the string modes or force a specific # function/MCP tool. tool_choice: nil, # Tools available to the model. tools: nil, - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. tracing: nil, # Controls how the realtime conversation is truncated prior to model inference. - # The default is `auto`. When set to `retention_ratio`, the server retains a - # fraction of the conversation tokens prior to the instructions. + # The default is `auto`. truncation: nil, # The type of session to create. Always `realtime` for the Realtime API. type: :realtime @@ -349,26 +340,24 @@ module OpenAI sig do override.returns( { - model: - T.any( - String, - OpenAI::Realtime::RealtimeSessionCreateRequest::Model::OrSymbol - ), type: Symbol, audio: OpenAI::Realtime::RealtimeAudioConfig, - client_secret: OpenAI::Realtime::RealtimeClientSecretConfig, include: T::Array[ OpenAI::Realtime::RealtimeSessionCreateRequest::Include::OrSymbol ], instructions: String, max_output_tokens: T.any(Integer, Symbol), + model: + T.any( + String, + OpenAI::Realtime::RealtimeSessionCreateRequest::Model::OrSymbol + ), output_modalities: T::Array[ OpenAI::Realtime::RealtimeSessionCreateRequest::OutputModality::OrSymbol ], prompt: T.nilable(OpenAI::Responses::ResponsePrompt), - temperature: Float, tool_choice: T.any( OpenAI::Responses::ToolChoiceOptions::OrSymbol, @@ -378,7 +367,7 @@ module OpenAI tools: T::Array[ T.any( - OpenAI::Realtime::RealtimeToolsConfigUnion::Function, + OpenAI::Realtime::Models, OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp ) ], @@ -392,7 +381,7 @@ module OpenAI truncation: T.any( OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol, - OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation + OpenAI::Realtime::RealtimeTruncationRetentionRatio ) } ) @@ -400,6 +389,54 @@ module OpenAI def to_hash end + module Include + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateRequest::Include + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = + T.let( + :"item.input_audio_transcription.logprobs", + OpenAI::Realtime::RealtimeSessionCreateRequest::Include::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateRequest::Include::TaggedSymbol + ] + ) + end + def self.values + end + end + + # Maximum number of output tokens for a single assistant response, inclusive of + # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + # `inf` for the maximum available tokens for a given model. Defaults to `inf`. + module MaxOutputTokens + extend OpenAI::Internal::Type::Union + + Variants = T.type_alias { T.any(Integer, Symbol) } + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateRequest::MaxOutputTokens::Variants + ] + ) + end + def self.variants + end + end + # The Realtime model used for this session. module Model extend OpenAI::Internal::Type::Union @@ -441,16 +478,6 @@ module OpenAI :"gpt-realtime-2025-08-28", OpenAI::Realtime::RealtimeSessionCreateRequest::Model::TaggedSymbol ) - GPT_4O_REALTIME = - T.let( - :"gpt-4o-realtime", - OpenAI::Realtime::RealtimeSessionCreateRequest::Model::TaggedSymbol - ) - GPT_4O_MINI_REALTIME = - T.let( - :"gpt-4o-mini-realtime", - OpenAI::Realtime::RealtimeSessionCreateRequest::Model::TaggedSymbol - ) GPT_4O_REALTIME_PREVIEW = T.let( :"gpt-4o-realtime-preview", @@ -483,54 +510,6 @@ module OpenAI ) end - module Include - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeSessionCreateRequest::Include - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = - T.let( - :"item.input_audio_transcription.logprobs", - OpenAI::Realtime::RealtimeSessionCreateRequest::Include::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeSessionCreateRequest::Include::TaggedSymbol - ] - ) - end - def self.values - end - end - - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or - # `inf` for the maximum available tokens for a given model. Defaults to `inf`. - module MaxOutputTokens - extend OpenAI::Internal::Type::Union - - Variants = T.type_alias { T.any(Integer, Symbol) } - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeSessionCreateRequest::MaxOutputTokens::Variants - ] - ) - end - def self.variants - end - end - module OutputModality extend OpenAI::Internal::Type::Enum diff --git a/rbi/openai/models/realtime/realtime_session_create_response.rbi b/rbi/openai/models/realtime/realtime_session_create_response.rbi index 07696f2f..3fbccb98 100644 --- a/rbi/openai/models/realtime/realtime_session_create_response.rbi +++ b/rbi/openai/models/realtime/realtime_session_create_response.rbi @@ -14,14 +14,7 @@ module OpenAI ) end - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - sig { returns(T.nilable(String)) } - attr_reader :id - - sig { params(id: String).void } - attr_writer :id - - # Configuration for input and output audio for the session. + # Configuration for input and output audio. sig do returns( T.nilable(OpenAI::Realtime::RealtimeSessionCreateResponse::Audio) @@ -37,17 +30,23 @@ module OpenAI end attr_writer :audio - # Expiration timestamp for the session, in seconds since epoch. - sig { returns(T.nilable(Integer)) } - attr_reader :expires_at + # Ephemeral key returned by the API. + sig do + returns(T.nilable(OpenAI::Realtime::RealtimeSessionClientSecret)) + end + attr_reader :client_secret - sig { params(expires_at: Integer).void } - attr_writer :expires_at + sig do + params( + client_secret: OpenAI::Realtime::RealtimeSessionClientSecret::OrHash + ).void + end + attr_writer :client_secret # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. sig do returns( T.nilable( @@ -102,21 +101,30 @@ module OpenAI attr_writer :max_output_tokens # The Realtime model used for this session. - sig { returns(T.nilable(String)) } + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::Variants + ) + ) + end attr_reader :model - sig { params(model: String).void } + sig do + params( + model: + T.any( + String, + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::OrSymbol + ) + ).void + end attr_writer :model - # The object type. Always `realtime.session`. - sig { returns(T.nilable(String)) } - attr_reader :object - - sig { params(object: String).void } - attr_writer :object - - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. + # The set of modalities the model can respond with. It defaults to `["audio"]`, + # indicating that the model will respond with audio plus a transcript. `["text"]` + # can be used to make the model respond with text only. It is not possible to + # request both `text` and `audio` at the same time. sig do returns( T.nilable( @@ -138,19 +146,48 @@ module OpenAI end attr_writer :output_modalities - # How the model chooses tools. Options are `auto`, `none`, `required`, or specify - # a function. - sig { returns(T.nilable(String)) } + # Reference to a prompt template and its variables. + # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + sig { returns(T.nilable(OpenAI::Responses::ResponsePrompt)) } + attr_reader :prompt + + sig do + params( + prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash) + ).void + end + attr_writer :prompt + + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice::Variants + ) + ) + end attr_reader :tool_choice - sig { params(tool_choice: String).void } + sig do + params( + tool_choice: + T.any( + OpenAI::Responses::ToolChoiceOptions::OrSymbol, + OpenAI::Responses::ToolChoiceFunction::OrHash, + OpenAI::Responses::ToolChoiceMcp::OrHash + ) + ).void + end attr_writer :tool_choice - # Tools (functions) available to the model. + # Tools available to the model. sig do returns( T.nilable( - T::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool] + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Variants + ] ) ) end @@ -160,14 +197,18 @@ module OpenAI params( tools: T::Array[ - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::OrHash + T.any( + OpenAI::Realtime::Models::OrHash, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::OrHash + ) ] ).void end attr_writer :tools - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. @@ -178,83 +219,106 @@ module OpenAI ) ) end - attr_reader :tracing + attr_accessor :tracing + + # Controls how the realtime conversation is truncated prior to model inference. + # The default is `auto`. + sig do + returns(T.nilable(OpenAI::Realtime::RealtimeTruncation::Variants)) + end + attr_reader :truncation sig do params( - tracing: + truncation: T.any( - Symbol, - OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration::OrHash + OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol, + OpenAI::Realtime::RealtimeTruncationRetentionRatio::OrHash ) ).void end - attr_writer :tracing + attr_writer :truncation - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. + # The type of session to create. Always `realtime` for the Realtime API. sig do returns( T.nilable( - OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection + OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol ) ) end - attr_reader :turn_detection + attr_reader :type sig do params( - turn_detection: - OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection::OrHash + type: + OpenAI::Realtime::RealtimeSessionCreateResponse::Type::OrSymbol ).void end - attr_writer :turn_detection + attr_writer :type - # A Realtime session configuration object. + # A new Realtime session configuration, with an ephemeral key. Default TTL for + # keys is one minute. sig do params( - id: String, audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::OrHash, - expires_at: Integer, + client_secret: + OpenAI::Realtime::RealtimeSessionClientSecret::OrHash, include: T::Array[ OpenAI::Realtime::RealtimeSessionCreateResponse::Include::OrSymbol ], instructions: String, max_output_tokens: T.any(Integer, Symbol), - model: String, - object: String, + model: + T.any( + String, + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::OrSymbol + ), output_modalities: T::Array[ OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::OrSymbol ], - tool_choice: String, + prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash), + tool_choice: + T.any( + OpenAI::Responses::ToolChoiceOptions::OrSymbol, + OpenAI::Responses::ToolChoiceFunction::OrHash, + OpenAI::Responses::ToolChoiceMcp::OrHash + ), tools: T::Array[ - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::OrHash + T.any( + OpenAI::Realtime::Models::OrHash, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::OrHash + ) ], tracing: + T.nilable( + T.any( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration::OrHash + ) + ), + truncation: T.any( - Symbol, - OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration::OrHash + OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol, + OpenAI::Realtime::RealtimeTruncationRetentionRatio::OrHash ), - turn_detection: - OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection::OrHash + type: + OpenAI::Realtime::RealtimeSessionCreateResponse::Type::OrSymbol ).returns(T.attached_class) end def self.new( - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - id: nil, - # Configuration for input and output audio for the session. + # Configuration for input and output audio. audio: nil, - # Expiration timestamp for the session, in seconds since epoch. - expires_at: nil, + # Ephemeral key returned by the API. + client_secret: nil, # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. include: nil, # The default system instructions (i.e. system message) prepended to model calls. # This field allows the client to guide the model on desired responses. The model @@ -274,35 +338,39 @@ module OpenAI max_output_tokens: nil, # The Realtime model used for this session. model: nil, - # The object type. Always `realtime.session`. - object: nil, - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. + # The set of modalities the model can respond with. It defaults to `["audio"]`, + # indicating that the model will respond with audio plus a transcript. `["text"]` + # can be used to make the model respond with text only. It is not possible to + # request both `text` and `audio` at the same time. output_modalities: nil, - # How the model chooses tools. Options are `auto`, `none`, `required`, or specify - # a function. + # Reference to a prompt template and its variables. + # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + prompt: nil, + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. tool_choice: nil, - # Tools (functions) available to the model. + # Tools available to the model. tools: nil, - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. tracing: nil, - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - turn_detection: nil + # Controls how the realtime conversation is truncated prior to model inference. + # The default is `auto`. + truncation: nil, + # The type of session to create. Always `realtime` for the Realtime API. + type: nil ) end sig do override.returns( { - id: String, audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio, - expires_at: Integer, + client_secret: OpenAI::Realtime::RealtimeSessionClientSecret, include: T::Array[ OpenAI::Realtime::RealtimeSessionCreateResponse::Include::TaggedSymbol @@ -310,19 +378,26 @@ module OpenAI instructions: String, max_output_tokens: OpenAI::Realtime::RealtimeSessionCreateResponse::MaxOutputTokens::Variants, - model: String, - object: String, + model: + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::Variants, output_modalities: T::Array[ OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol ], - tool_choice: String, + prompt: T.nilable(OpenAI::Responses::ResponsePrompt), + tool_choice: + OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice::Variants, tools: - T::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool], + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Variants + ], tracing: - OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::Variants, - turn_detection: - OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::Variants + ), + truncation: OpenAI::Realtime::RealtimeTruncation::Variants, + type: + OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol } ) end @@ -372,7 +447,7 @@ module OpenAI end attr_writer :output - # Configuration for input and output audio for the session. + # Configuration for input and output audio. sig do params( input: @@ -406,14 +481,31 @@ module OpenAI ) end - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - sig { returns(T.nilable(String)) } + # The format of the input audio. + sig do + returns( + T.nilable(OpenAI::Realtime::RealtimeAudioFormats::Variants) + ) + end attr_reader :format_ - sig { params(format_: String).void } + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ) + ).void + end attr_writer :format_ - # Configuration for input audio noise reduction. + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. sig do returns( T.nilable( @@ -431,25 +523,34 @@ module OpenAI end attr_writer :noise_reduction - # Configuration for input audio transcription. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription - ) - ) - end + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. + sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) } attr_reader :transcription sig do params( - transcription: - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription::OrHash + transcription: OpenAI::Realtime::AudioTranscription::OrHash ).void end attr_writer :transcription - # Configuration for turn detection. + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. sig do returns( T.nilable( @@ -469,23 +570,47 @@ module OpenAI sig do params( - format_: String, + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ), noise_reduction: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::OrHash, - transcription: - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription::OrHash, + transcription: OpenAI::Realtime::AudioTranscription::OrHash, turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::OrHash ).returns(T.attached_class) end def self.new( - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # The format of the input audio. format_: nil, - # Configuration for input audio noise reduction. + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. noise_reduction: nil, - # Configuration for input audio transcription. + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. transcription: nil, - # Configuration for turn detection. + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. turn_detection: nil ) end @@ -493,11 +618,10 @@ module OpenAI sig do override.returns( { - format_: String, + format_: OpenAI::Realtime::RealtimeAudioFormats::Variants, noise_reduction: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction, - transcription: - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription, + transcription: OpenAI::Realtime::AudioTranscription, turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection } @@ -515,182 +639,198 @@ module OpenAI ) end + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. sig do returns( - T.nilable( - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) + T.nilable(OpenAI::Realtime::NoiseReductionType::TaggedSymbol) ) end attr_reader :type sig do params( - type: - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol + type: OpenAI::Realtime::NoiseReductionType::OrSymbol ).void end attr_writer :type - # Configuration for input audio noise reduction. + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. sig do params( - type: - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol + type: OpenAI::Realtime::NoiseReductionType::OrSymbol ).returns(T.attached_class) end - def self.new(type: nil) + def self.new( + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + type: nil + ) end sig do override.returns( - { - type: - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - } + { type: OpenAI::Realtime::NoiseReductionType::TaggedSymbol } ) end def to_hash end - - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - NEAR_FIELD = - T.let( - :near_field, - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - FAR_FIELD = - T.let( - :far_field, - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol - ] - ) - end - def self.values - end - end end - class Transcription < OpenAI::Internal::Type::BaseModel + class TurnDetection < OpenAI::Internal::Type::BaseModel OrHash = T.type_alias do T.any( - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection, OpenAI::Internal::AnyHash ) end - # The language of the input audio. - sig { returns(T.nilable(String)) } - attr_reader :language - - sig { params(language: String).void } - attr_writer :language - - # The model to use for transcription. - sig { returns(T.nilable(String)) } - attr_reader :model - - sig { params(model: String).void } - attr_writer :model + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :create_response - # Optional text to guide the model's style or continue a previous audio segment. - sig { returns(T.nilable(String)) } - attr_reader :prompt + sig { params(create_response: T::Boolean).void } + attr_writer :create_response - sig { params(prompt: String).void } - attr_writer :prompt - - # Configuration for input audio transcription. + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. sig do - params(language: String, model: String, prompt: String).returns( - T.attached_class + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol + ) ) end - def self.new( - # The language of the input audio. - language: nil, - # The model to use for transcription. - model: nil, - # Optional text to guide the model's style or continue a previous audio segment. - prompt: nil - ) - end + attr_reader :eagerness sig do - override.returns( - { language: String, model: String, prompt: String } - ) - end - def to_hash + params( + eagerness: + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::OrSymbol + ).void end - end + attr_writer :eagerness - class TurnDetection < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection, - OpenAI::Internal::AnyHash - ) - end + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + sig { returns(T.nilable(Integer)) } + attr_accessor :idle_timeout_ms + + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :interrupt_response + sig { params(interrupt_response: T::Boolean).void } + attr_writer :interrupt_response + + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. sig { returns(T.nilable(Integer)) } attr_reader :prefix_padding_ms sig { params(prefix_padding_ms: Integer).void } attr_writer :prefix_padding_ms + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. sig { returns(T.nilable(Integer)) } attr_reader :silence_duration_ms sig { params(silence_duration_ms: Integer).void } attr_writer :silence_duration_ms + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. sig { returns(T.nilable(Float)) } attr_reader :threshold sig { params(threshold: Float).void } attr_writer :threshold - # Type of turn detection, only `server_vad` is currently supported. - sig { returns(T.nilable(String)) } + # Type of turn detection. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol + ) + ) + end attr_reader :type - sig { params(type: String).void } + sig do + params( + type: + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::OrSymbol + ).void + end attr_writer :type - # Configuration for turn detection. + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. sig do params( + create_response: T::Boolean, + eagerness: + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::OrSymbol, + idle_timeout_ms: T.nilable(Integer), + interrupt_response: T::Boolean, prefix_padding_ms: Integer, silence_duration_ms: Integer, threshold: Float, - type: String + type: + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::OrSymbol ).returns(T.attached_class) end def self.new( + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + create_response: nil, + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + eagerness: nil, + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + idle_timeout_ms: nil, + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + interrupt_response: nil, + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. prefix_padding_ms: nil, + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. silence_duration_ms: nil, + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. threshold: nil, - # Type of turn detection, only `server_vad` is currently supported. + # Type of turn detection. type: nil ) end @@ -698,15 +838,104 @@ module OpenAI sig do override.returns( { + create_response: T::Boolean, + eagerness: + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol, + idle_timeout_ms: T.nilable(Integer), + interrupt_response: T::Boolean, prefix_padding_ms: Integer, silence_duration_ms: Integer, threshold: Float, - type: String + type: + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol } ) end def to_hash end + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + # and `high` have max timeouts of 8s, 4s, and 2s respectively. + module Eagerness + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + LOW = + T.let( + :low, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol + ) + MEDIUM = + T.let( + :medium, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol + ) + HIGH = + T.let( + :high, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol + ) + AUTO = + T.let( + :auto, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol + ] + ) + end + def self.values + end + end + + # Type of turn detection. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + SERVER_VAD = + T.let( + :server_vad, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol + ) + SEMANTIC_VAD = + T.let( + :semantic_vad, + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol + ] + ) + end + def self.values + end + end end end @@ -719,19 +948,43 @@ module OpenAI ) end - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - sig { returns(T.nilable(String)) } + # The format of the output audio. + sig do + returns( + T.nilable(OpenAI::Realtime::RealtimeAudioFormats::Variants) + ) + end attr_reader :format_ - sig { params(format_: String).void } + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ) + ).void + end attr_writer :format_ + # The speed of the model's spoken response as a multiple of the original speed. + # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + # This value can only be changed in between model turns, not while a response is + # in progress. + # + # This parameter is a post-processing adjustment to the audio after it is + # generated, it's also possible to prompt the model to speak faster or slower. sig { returns(T.nilable(Float)) } attr_reader :speed sig { params(speed: Float).void } attr_writer :speed + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. sig do returns( T.nilable( @@ -754,7 +1007,12 @@ module OpenAI sig do params( - format_: String, + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ), speed: Float, voice: T.any( @@ -764,9 +1022,20 @@ module OpenAI ).returns(T.attached_class) end def self.new( - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + # The format of the output audio. format_: nil, + # The speed of the model's spoken response as a multiple of the original speed. + # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. + # This value can only be changed in between model turns, not while a response is + # in progress. + # + # This parameter is a post-processing adjustment to the audio after it is + # generated, it's also possible to prompt the model to speak faster or slower. speed: nil, + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. voice: nil ) end @@ -774,7 +1043,7 @@ module OpenAI sig do override.returns( { - format_: String, + format_: OpenAI::Realtime::RealtimeAudioFormats::Variants, speed: Float, voice: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Output::Voice::Variants @@ -784,6 +1053,10 @@ module OpenAI def to_hash end + # The voice the model uses to respond. Voice cannot be changed during the session + # once the model has responded with audio at least once. Current voice options are + # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, + # and `cedar`. We recommend `marin` and `cedar` for best quality. module Voice extend OpenAI::Internal::Type::Union @@ -916,158 +1189,763 @@ module OpenAI end end - module OutputModality - extend OpenAI::Internal::Type::Enum + # The Realtime model used for this session. + module Model + extend OpenAI::Internal::Type::Union - TaggedSymbol = + Variants = T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality + T.any( + String, + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol ) end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - TEXT = - T.let( - :text, - OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol - ) - AUDIO = - T.let( - :audio, - OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol - ) sig do override.returns( T::Array[ - OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::Variants ] ) end - def self.values + def self.variants end - end - class Tool < OpenAI::Internal::Type::BaseModel - OrHash = + TaggedSymbol = T.type_alias do - T.any( - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool, - OpenAI::Internal::AnyHash + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::Model ) end + OrSymbol = T.type_alias { T.any(Symbol, String) } - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - sig { returns(T.nilable(String)) } - attr_reader :description - - sig { params(description: String).void } - attr_writer :description - - # The name of the function. - sig { returns(T.nilable(String)) } - attr_reader :name + GPT_REALTIME = + T.let( + :"gpt-realtime", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + GPT_REALTIME_2025_08_28 = + T.let( + :"gpt-realtime-2025-08-28", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + GPT_4O_REALTIME_PREVIEW = + T.let( + :"gpt-4o-realtime-preview", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + GPT_4O_REALTIME_PREVIEW_2024_10_01 = + T.let( + :"gpt-4o-realtime-preview-2024-10-01", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + GPT_4O_REALTIME_PREVIEW_2024_12_17 = + T.let( + :"gpt-4o-realtime-preview-2024-12-17", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + GPT_4O_REALTIME_PREVIEW_2025_06_03 = + T.let( + :"gpt-4o-realtime-preview-2025-06-03", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + GPT_4O_MINI_REALTIME_PREVIEW = + T.let( + :"gpt-4o-mini-realtime-preview", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + GPT_4O_MINI_REALTIME_PREVIEW_2024_12_17 = + T.let( + :"gpt-4o-mini-realtime-preview-2024-12-17", + OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol + ) + end - sig { params(name: String).void } - attr_writer :name + module OutputModality + extend OpenAI::Internal::Type::Enum - # Parameters of the function in JSON Schema. - sig { returns(T.nilable(T.anything)) } - attr_reader :parameters + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } - sig { params(parameters: T.anything).void } - attr_writer :parameters + TEXT = + T.let( + :text, + OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol + ) + AUDIO = + T.let( + :audio, + OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol + ) - # The type of the tool, i.e. `function`. sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol - ) + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol + ] ) end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::OrSymbol - ).void + def self.values end - attr_writer :type + end - sig do - params( - description: String, - name: String, - parameters: T.anything, - type: - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - description: nil, - # The name of the function. - name: nil, - # Parameters of the function in JSON Schema. - parameters: nil, - # The type of the tool, i.e. `function`. - type: nil - ) - end + # How the model chooses tools. Provide one of the string modes or force a specific + # function/MCP tool. + module ToolChoice + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Responses::ToolChoiceOptions::TaggedSymbol, + OpenAI::Responses::ToolChoiceFunction, + OpenAI::Responses::ToolChoiceMcp + ) + end sig do override.returns( - { - description: String, - name: String, - parameters: T.anything, - type: - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol - } + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice::Variants + ] ) end - def to_hash + def self.variants end + end - # The type of the tool, i.e. `function`. - module Type - extend OpenAI::Internal::Type::Enum + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + module Tool + extend OpenAI::Internal::Type::Union - TaggedSymbol = + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::Models, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool + ) + end + + class McpTool < OpenAI::Internal::Type::BaseModel + OrHash = T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type + T.any( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool, + OpenAI::Internal::AnyHash ) end - OrSymbol = T.type_alias { T.any(Symbol, String) } - FUNCTION = - T.let( - :function, - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol + # A label for this MCP server, used to identify it in tool calls. + sig { returns(String) } + attr_accessor :server_label + + # The type of the MCP tool. Always `mcp`. + sig { returns(Symbol) } + attr_accessor :type + + # List of allowed tool names or a filter object. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::Variants + ) + ) + end + attr_accessor :allowed_tools + + # An OAuth access token that can be used with a remote MCP server, either with a + # custom MCP server URL or a service connector. Your application must handle the + # OAuth authorization flow and provide the token here. + sig { returns(T.nilable(String)) } + attr_reader :authorization + + sig { params(authorization: String).void } + attr_writer :authorization + + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + ) + end + attr_reader :connector_id + + sig do + params( + connector_id: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::OrSymbol + ).void + end + attr_writer :connector_id + + # Optional HTTP headers to send to the MCP server. Use for authentication or other + # purposes. + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_accessor :headers + + # Specify which of the MCP server's tools require approval. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::Variants + ) ) + end + attr_accessor :require_approval + + # Optional description of the MCP server, used to provide more context. + sig { returns(T.nilable(String)) } + attr_reader :server_description + + sig { params(server_description: String).void } + attr_writer :server_description + + # The URL for the MCP server. One of `server_url` or `connector_id` must be + # provided. + sig { returns(T.nilable(String)) } + attr_reader :server_url + + sig { params(server_url: String).void } + attr_writer :server_url + + # Give the model access to additional tools via remote Model Context Protocol + # (MCP) servers. + # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + sig do + params( + server_label: String, + allowed_tools: + T.nilable( + T.any( + T::Array[String], + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter::OrHash + ) + ), + authorization: String, + connector_id: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::OrSymbol, + headers: T.nilable(T::Hash[Symbol, String]), + require_approval: + T.nilable( + T.any( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::OrHash, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::OrSymbol + ) + ), + server_description: String, + server_url: String, + type: Symbol + ).returns(T.attached_class) + end + def self.new( + # A label for this MCP server, used to identify it in tool calls. + server_label:, + # List of allowed tool names or a filter object. + allowed_tools: nil, + # An OAuth access token that can be used with a remote MCP server, either with a + # custom MCP server URL or a service connector. Your application must handle the + # OAuth authorization flow and provide the token here. + authorization: nil, + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + connector_id: nil, + # Optional HTTP headers to send to the MCP server. Use for authentication or other + # purposes. + headers: nil, + # Specify which of the MCP server's tools require approval. + require_approval: nil, + # Optional description of the MCP server, used to provide more context. + server_description: nil, + # The URL for the MCP server. One of `server_url` or `connector_id` must be + # provided. + server_url: nil, + # The type of the MCP tool. Always `mcp`. + type: :mcp + ) + end sig do override.returns( - T::Array[ - OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol - ] + { + server_label: String, + type: Symbol, + allowed_tools: + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::Variants + ), + authorization: String, + connector_id: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol, + headers: T.nilable(T::Hash[Symbol, String]), + require_approval: + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::Variants + ), + server_description: String, + server_url: String + } ) end - def self.values + def to_hash + end + + # List of allowed tool names or a filter object. + module AllowedTools + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + T::Array[String], + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter + ) + end + + class McpToolFilter < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter, + OpenAI::Internal::AnyHash + ) + end + + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :read_only + + sig { params(read_only: T::Boolean).void } + attr_writer :read_only + + # List of allowed tool names. + sig { returns(T.nilable(T::Array[String])) } + attr_reader :tool_names + + sig { params(tool_names: T::Array[String]).void } + attr_writer :tool_names + + # A filter object to specify which tools are allowed. + sig do + params( + read_only: T::Boolean, + tool_names: T::Array[String] + ).returns(T.attached_class) + end + def self.new( + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + read_only: nil, + # List of allowed tool names. + tool_names: nil + ) + end + + sig do + override.returns( + { read_only: T::Boolean, tool_names: T::Array[String] } + ) + end + def to_hash + end + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::Variants + ] + ) + end + def self.variants + end + + StringArray = + T.let( + OpenAI::Internal::Type::ArrayOf[String], + OpenAI::Internal::Type::Converter + ) + end + + # Identifier for service connectors, like those available in ChatGPT. One of + # `server_url` or `connector_id` must be provided. Learn more about service + # connectors + # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + # + # Currently supported `connector_id` values are: + # + # - Dropbox: `connector_dropbox` + # - Gmail: `connector_gmail` + # - Google Calendar: `connector_googlecalendar` + # - Google Drive: `connector_googledrive` + # - Microsoft Teams: `connector_microsoftteams` + # - Outlook Calendar: `connector_outlookcalendar` + # - Outlook Email: `connector_outlookemail` + # - SharePoint: `connector_sharepoint` + module ConnectorID + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + CONNECTOR_DROPBOX = + T.let( + :connector_dropbox, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_GMAIL = + T.let( + :connector_gmail, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_GOOGLECALENDAR = + T.let( + :connector_googlecalendar, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_GOOGLEDRIVE = + T.let( + :connector_googledrive, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_MICROSOFTTEAMS = + T.let( + :connector_microsoftteams, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_OUTLOOKCALENDAR = + T.let( + :connector_outlookcalendar, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_OUTLOOKEMAIL = + T.let( + :connector_outlookemail, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + CONNECTOR_SHAREPOINT = + T.let( + :connector_sharepoint, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol + ] + ) + end + def self.values + end end + + # Specify which of the MCP server's tools require approval. + module RequireApproval + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ) + end + + class McpToolApprovalFilter < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter, + OpenAI::Internal::AnyHash + ) + end + + # A filter object to specify which tools are allowed. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always + ) + ) + end + attr_reader :always + + sig do + params( + always: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always::OrHash + ).void + end + attr_writer :always + + # A filter object to specify which tools are allowed. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never + ) + ) + end + attr_reader :never + + sig do + params( + never: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never::OrHash + ).void + end + attr_writer :never + + # Specify which of the MCP server's tools require approval. Can be `always`, + # `never`, or a filter object associated with tools that require approval. + sig do + params( + always: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always::OrHash, + never: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never::OrHash + ).returns(T.attached_class) + end + def self.new( + # A filter object to specify which tools are allowed. + always: nil, + # A filter object to specify which tools are allowed. + never: nil + ) + end + + sig do + override.returns( + { + always: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always, + never: + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never + } + ) + end + def to_hash + end + + class Always < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always, + OpenAI::Internal::AnyHash + ) + end + + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :read_only + + sig { params(read_only: T::Boolean).void } + attr_writer :read_only + + # List of allowed tool names. + sig { returns(T.nilable(T::Array[String])) } + attr_reader :tool_names + + sig { params(tool_names: T::Array[String]).void } + attr_writer :tool_names + + # A filter object to specify which tools are allowed. + sig do + params( + read_only: T::Boolean, + tool_names: T::Array[String] + ).returns(T.attached_class) + end + def self.new( + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + read_only: nil, + # List of allowed tool names. + tool_names: nil + ) + end + + sig do + override.returns( + { read_only: T::Boolean, tool_names: T::Array[String] } + ) + end + def to_hash + end + end + + class Never < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never, + OpenAI::Internal::AnyHash + ) + end + + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :read_only + + sig { params(read_only: T::Boolean).void } + attr_writer :read_only + + # List of allowed tool names. + sig { returns(T.nilable(T::Array[String])) } + attr_reader :tool_names + + sig { params(tool_names: T::Array[String]).void } + attr_writer :tool_names + + # A filter object to specify which tools are allowed. + sig do + params( + read_only: T::Boolean, + tool_names: T::Array[String] + ).returns(T.attached_class) + end + def self.new( + # Indicates whether or not a tool modifies data or is read-only. If an MCP server + # is + # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + # it will match this filter. + read_only: nil, + # List of allowed tool names. + tool_names: nil + ) + end + + sig do + override.returns( + { read_only: T::Boolean, tool_names: T::Array[String] } + ) + end + def to_hash + end + end + end + + # Specify a single approval policy for all tools. One of `always` or `never`. When + # set to `always`, all tools will require approval. When set to `never`, all tools + # will not require approval. + module McpToolApprovalSetting + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + ALWAYS = + T.let( + :always, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ) + NEVER = + T.let( + :never, + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol + ] + ) + end + def self.values + end + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::Variants + ] + ) + end + def self.variants + end + end + end + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Variants + ] + ) + end + def self.variants end end - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. @@ -1092,15 +1970,15 @@ module OpenAI end # The group id to attach to this trace to enable filtering and grouping in the - # traces dashboard. + # Traces Dashboard. sig { returns(T.nilable(String)) } attr_reader :group_id sig { params(group_id: String).void } attr_writer :group_id - # The arbitrary metadata to attach to this trace to enable filtering in the traces - # dashboard. + # The arbitrary metadata to attach to this trace to enable filtering in the Traces + # Dashboard. sig { returns(T.nilable(T.anything)) } attr_reader :metadata @@ -1108,7 +1986,7 @@ module OpenAI attr_writer :metadata # The name of the workflow to attach to this trace. This is used to name the trace - # in the traces dashboard. + # in the Traces Dashboard. sig { returns(T.nilable(String)) } attr_reader :workflow_name @@ -1125,13 +2003,13 @@ module OpenAI end def self.new( # The group id to attach to this trace to enable filtering and grouping in the - # traces dashboard. + # Traces Dashboard. group_id: nil, - # The arbitrary metadata to attach to this trace to enable filtering in the traces - # dashboard. + # The arbitrary metadata to attach to this trace to enable filtering in the Traces + # Dashboard. metadata: nil, # The name of the workflow to attach to this trace. This is used to name the trace - # in the traces dashboard. + # in the Traces Dashboard. workflow_name: nil ) end @@ -1160,87 +2038,33 @@ module OpenAI end end - class TurnDetection < OpenAI::Internal::Type::BaseModel - OrHash = + # The type of session to create. Always `realtime` for the Realtime API. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = T.type_alias do - T.any( - OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection, - OpenAI::Internal::AnyHash + T.all( + Symbol, + OpenAI::Realtime::RealtimeSessionCreateResponse::Type ) end + OrSymbol = T.type_alias { T.any(Symbol, String) } - # Amount of audio to include before the VAD detected speech (in milliseconds). - # Defaults to 300ms. - sig { returns(T.nilable(Integer)) } - attr_reader :prefix_padding_ms - - sig { params(prefix_padding_ms: Integer).void } - attr_writer :prefix_padding_ms - - # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. - # With shorter values the model will respond more quickly, but may jump in on - # short pauses from the user. - sig { returns(T.nilable(Integer)) } - attr_reader :silence_duration_ms - - sig { params(silence_duration_ms: Integer).void } - attr_writer :silence_duration_ms - - # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher - # threshold will require louder audio to activate the model, and thus might - # perform better in noisy environments. - sig { returns(T.nilable(Float)) } - attr_reader :threshold - - sig { params(threshold: Float).void } - attr_writer :threshold - - # Type of turn detection, only `server_vad` is currently supported. - sig { returns(T.nilable(String)) } - attr_reader :type - - sig { params(type: String).void } - attr_writer :type - - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - sig do - params( - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - ).returns(T.attached_class) - end - def self.new( - # Amount of audio to include before the VAD detected speech (in milliseconds). - # Defaults to 300ms. - prefix_padding_ms: nil, - # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. - # With shorter values the model will respond more quickly, but may jump in on - # short pauses from the user. - silence_duration_ms: nil, - # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher - # threshold will require louder audio to activate the model, and thus might - # perform better in noisy environments. - threshold: nil, - # Type of turn detection, only `server_vad` is currently supported. - type: nil - ) - end + REALTIME = + T.let( + :realtime, + OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol + ) sig do override.returns( - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } + T::Array[ + OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol + ] ) end - def to_hash + def self.values end end end diff --git a/rbi/openai/models/realtime/realtime_tools_config_union.rbi b/rbi/openai/models/realtime/realtime_tools_config_union.rbi index 35f3c776..97033485 100644 --- a/rbi/openai/models/realtime/realtime_tools_config_union.rbi +++ b/rbi/openai/models/realtime/realtime_tools_config_union.rbi @@ -12,127 +12,11 @@ module OpenAI Variants = T.type_alias do T.any( - OpenAI::Realtime::RealtimeToolsConfigUnion::Function, + OpenAI::Realtime::Models, OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp ) end - class Function < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeToolsConfigUnion::Function, - OpenAI::Internal::AnyHash - ) - end - - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - sig { returns(T.nilable(String)) } - attr_reader :description - - sig { params(description: String).void } - attr_writer :description - - # The name of the function. - sig { returns(T.nilable(String)) } - attr_reader :name - - sig { params(name: String).void } - attr_writer :name - - # Parameters of the function in JSON Schema. - sig { returns(T.nilable(T.anything)) } - attr_reader :parameters - - sig { params(parameters: T.anything).void } - attr_writer :parameters - - # The type of the tool, i.e. `function`. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol - ).void - end - attr_writer :type - - sig do - params( - description: String, - name: String, - parameters: T.anything, - type: - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - description: nil, - # The name of the function. - name: nil, - # Parameters of the function in JSON Schema. - parameters: nil, - # The type of the tool, i.e. `function`. - type: nil - ) - end - - sig do - override.returns( - { - description: String, - name: String, - parameters: T.anything, - type: - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol - } - ) - end - def to_hash - end - - # The type of the tool, i.e. `function`. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - FUNCTION = - T.let( - :function, - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - class Mcp < OpenAI::Internal::Type::BaseModel OrHash = T.type_alias do diff --git a/rbi/openai/models/realtime/realtime_tracing_config.rbi b/rbi/openai/models/realtime/realtime_tracing_config.rbi index ac74768c..dd60c722 100644 --- a/rbi/openai/models/realtime/realtime_tracing_config.rbi +++ b/rbi/openai/models/realtime/realtime_tracing_config.rbi @@ -3,8 +3,9 @@ module OpenAI module Models module Realtime - # Configuration options for tracing. Set to null to disable tracing. Once tracing - # is enabled for a session, the configuration cannot be modified. + # Realtime API can write session traces to the + # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once + # tracing is enabled for a session, the configuration cannot be modified. # # `auto` will create a trace for the session with default values for the workflow # name, group id, and metadata. @@ -29,15 +30,15 @@ module OpenAI end # The group id to attach to this trace to enable filtering and grouping in the - # traces dashboard. + # Traces Dashboard. sig { returns(T.nilable(String)) } attr_reader :group_id sig { params(group_id: String).void } attr_writer :group_id - # The arbitrary metadata to attach to this trace to enable filtering in the traces - # dashboard. + # The arbitrary metadata to attach to this trace to enable filtering in the Traces + # Dashboard. sig { returns(T.nilable(T.anything)) } attr_reader :metadata @@ -45,7 +46,7 @@ module OpenAI attr_writer :metadata # The name of the workflow to attach to this trace. This is used to name the trace - # in the traces dashboard. + # in the Traces Dashboard. sig { returns(T.nilable(String)) } attr_reader :workflow_name @@ -62,13 +63,13 @@ module OpenAI end def self.new( # The group id to attach to this trace to enable filtering and grouping in the - # traces dashboard. + # Traces Dashboard. group_id: nil, - # The arbitrary metadata to attach to this trace to enable filtering in the traces - # dashboard. + # The arbitrary metadata to attach to this trace to enable filtering in the Traces + # Dashboard. metadata: nil, # The name of the workflow to attach to this trace. This is used to name the trace - # in the traces dashboard. + # in the Traces Dashboard. workflow_name: nil ) end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi b/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi new file mode 100644 index 00000000..ff5fe964 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi @@ -0,0 +1,50 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionAudio < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionAudio, + OpenAI::Internal::AnyHash + ) + end + + sig do + returns( + T.nilable(OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput) + ) + end + attr_reader :input + + sig do + params( + input: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::OrHash + ).void + end + attr_writer :input + + # Configuration for input and output audio. + sig do + params( + input: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::OrHash + ).returns(T.attached_class) + end + def self.new(input: nil) + end + + sig do + override.returns( + { input: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput } + ) + end + def to_hash + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi b/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi new file mode 100644 index 00000000..360679f8 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi @@ -0,0 +1,226 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionAudioInput < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput, + OpenAI::Internal::AnyHash + ) + end + + # The PCM audio format. Only a 24kHz sample rate is supported. + sig do + returns( + T.nilable( + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ) + ) + ) + end + attr_reader :format_ + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ) + ).void + end + attr_writer :format_ + + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction + ) + ) + end + attr_reader :noise_reduction + + sig do + params( + noise_reduction: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction::OrHash + ).void + end + attr_writer :noise_reduction + + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. + sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) } + attr_reader :transcription + + sig do + params( + transcription: OpenAI::Realtime::AudioTranscription::OrHash + ).void + end + attr_writer :transcription + + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection + ) + ) + end + attr_reader :turn_detection + + sig do + params( + turn_detection: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::OrHash + ).void + end + attr_writer :turn_detection + + sig do + params( + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash + ), + noise_reduction: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction::OrHash, + transcription: OpenAI::Realtime::AudioTranscription::OrHash, + turn_detection: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::OrHash + ).returns(T.attached_class) + end + def self.new( + # The PCM audio format. Only a 24kHz sample rate is supported. + format_: nil, + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + noise_reduction: nil, + # Configuration for input audio transcription, defaults to off and can be set to + # `null` to turn off once on. Input audio transcription is not native to the + # model, since the model consumes audio directly. Transcription runs + # asynchronously through + # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) + # and should be treated as guidance of input audio content rather than precisely + # what the model heard. The client can optionally set the language and prompt for + # transcription, these offer additional guidance to the transcription service. + transcription: nil, + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + turn_detection: nil + ) + end + + sig do + override.returns( + { + format_: + T.any( + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU, + OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + ), + noise_reduction: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction, + transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection + } + ) + end + def to_hash + end + + class NoiseReduction < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction, + OpenAI::Internal::AnyHash + ) + end + + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + sig do + returns(T.nilable(OpenAI::Realtime::NoiseReductionType::OrSymbol)) + end + attr_reader :type + + sig do + params(type: OpenAI::Realtime::NoiseReductionType::OrSymbol).void + end + attr_writer :type + + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + sig do + params( + type: OpenAI::Realtime::NoiseReductionType::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + type: nil + ) + end + + sig do + override.returns( + { type: OpenAI::Realtime::NoiseReductionType::OrSymbol } + ) + end + def to_hash + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi b/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi new file mode 100644 index 00000000..676cf1eb --- /dev/null +++ b/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi @@ -0,0 +1,259 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeTranscriptionSessionAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection, + OpenAI::Internal::AnyHash + ) + end + + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :create_response + + sig { params(create_response: T::Boolean).void } + attr_writer :create_response + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::OrSymbol + ) + ) + end + attr_reader :eagerness + + sig do + params( + eagerness: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::OrSymbol + ).void + end + attr_writer :eagerness + + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + sig { returns(T.nilable(Integer)) } + attr_accessor :idle_timeout_ms + + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + sig { returns(T.nilable(T::Boolean)) } + attr_reader :interrupt_response + + sig { params(interrupt_response: T::Boolean).void } + attr_writer :interrupt_response + + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. + sig { returns(T.nilable(Integer)) } + attr_reader :prefix_padding_ms + + sig { params(prefix_padding_ms: Integer).void } + attr_writer :prefix_padding_ms + + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. + sig { returns(T.nilable(Integer)) } + attr_reader :silence_duration_ms + + sig { params(silence_duration_ms: Integer).void } + attr_writer :silence_duration_ms + + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. + sig { returns(T.nilable(Float)) } + attr_reader :threshold + + sig { params(threshold: Float).void } + attr_writer :threshold + + # Type of turn detection. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type::OrSymbol + ) + ) + end + attr_reader :type + + sig do + params( + type: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type::OrSymbol + ).void + end + attr_writer :type + + # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be + # set to `null` to turn off, in which case the client must manually trigger model + # response. Server VAD means that the model will detect the start and end of + # speech based on audio volume and respond at the end of user speech. Semantic VAD + # is more advanced and uses a turn detection model (in conjunction with VAD) to + # semantically estimate whether the user has finished speaking, then dynamically + # sets a timeout based on this probability. For example, if user audio trails off + # with "uhhm", the model will score a low probability of turn end and wait longer + # for the user to continue speaking. This can be useful for more natural + # conversations, but may have a higher latency. + sig do + params( + create_response: T::Boolean, + eagerness: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::OrSymbol, + idle_timeout_ms: T.nilable(Integer), + interrupt_response: T::Boolean, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # Whether or not to automatically generate a response when a VAD stop event + # occurs. + create_response: nil, + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. + eagerness: nil, + # Optional idle timeout after which turn detection will auto-timeout when no + # additional audio is received. + idle_timeout_ms: nil, + # Whether or not to automatically interrupt any ongoing response with output to + # the default conversation (i.e. `conversation` of `auto`) when a VAD start event + # occurs. + interrupt_response: nil, + # Used only for `server_vad` mode. Amount of audio to include before the VAD + # detected speech (in milliseconds). Defaults to 300ms. + prefix_padding_ms: nil, + # Used only for `server_vad` mode. Duration of silence to detect speech stop (in + # milliseconds). Defaults to 500ms. With shorter values the model will respond + # more quickly, but may jump in on short pauses from the user. + silence_duration_ms: nil, + # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + # defaults to 0.5. A higher threshold will require louder audio to activate the + # model, and thus might perform better in noisy environments. + threshold: nil, + # Type of turn detection. + type: nil + ) + end + + sig do + override.returns( + { + create_response: T::Boolean, + eagerness: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::OrSymbol, + idle_timeout_ms: T.nilable(Integer), + interrupt_response: T::Boolean, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type::OrSymbol + } + ) + end + def to_hash + end + + # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + # will wait longer for the user to continue speaking, `high` will respond more + # quickly. `auto` is the default and is equivalent to `medium`. + module Eagerness + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + LOW = + T.let( + :low, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + MEDIUM = + T.let( + :medium, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + HIGH = + T.let( + :high, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + AUTO = + T.let( + :auto, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness::TaggedSymbol + ] + ) + end + def self.values + end + end + + # Type of turn detection. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + SERVER_VAD = + T.let( + :server_vad, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type::TaggedSymbol + ) + SEMANTIC_VAD = + T.let( + :semantic_vad, + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi b/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi new file mode 100644 index 00000000..3e0f16be --- /dev/null +++ b/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi @@ -0,0 +1,51 @@ +# typed: strong + +module OpenAI + module Models + RealtimeTranscriptionSessionClientSecret = + Realtime::RealtimeTranscriptionSessionClientSecret + + module Realtime + class RealtimeTranscriptionSessionClientSecret < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret, + OpenAI::Internal::AnyHash + ) + end + + # Timestamp for when the token expires. Currently, all tokens expire after one + # minute. + sig { returns(Integer) } + attr_accessor :expires_at + + # Ephemeral key usable in client environments to authenticate connections to the + # Realtime API. Use this in client-side environments rather than a standard API + # token, which should only be used server-side. + sig { returns(String) } + attr_accessor :value + + # Ephemeral key returned by the API. Only present when the session is created on + # the server via REST API. + sig do + params(expires_at: Integer, value: String).returns(T.attached_class) + end + def self.new( + # Timestamp for when the token expires. Currently, all tokens expire after one + # minute. + expires_at:, + # Ephemeral key usable in client environments to authenticate connections to the + # Realtime API. Use this in client-side environments rather than a standard API + # token, which should only be used server-side. + value: + ) + end + + sig { override.returns({ expires_at: Integer, value: String }) } + def to_hash + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi b/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi index af6980d1..ee1ad3e9 100644 --- a/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +++ b/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi @@ -12,27 +12,30 @@ module OpenAI ) end - # ID of the model to use. The options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source - # Whisper V2 model). + # The type of session to create. Always `transcription` for transcription + # sessions. + sig { returns(Symbol) } + attr_accessor :type + + # Configuration for input and output audio. sig do returns( - T.any( - String, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol - ) + T.nilable(OpenAI::Realtime::RealtimeTranscriptionSessionAudio) ) end - attr_accessor :model + attr_reader :audio - # The type of session to create. Always `transcription` for transcription - # sessions. - sig { returns(Symbol) } - attr_accessor :type + sig do + params( + audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio::OrHash + ).void + end + attr_writer :audio - # The set of items to include in the transcription. Current available items are: + # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs` + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. sig do returns( T.nilable( @@ -54,138 +57,25 @@ module OpenAI end attr_writer :include - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol - ) - ) - end - attr_reader :input_audio_format - - sig do - params( - input_audio_format: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol - ).void - end - attr_writer :input_audio_format - - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction - ) - ) - end - attr_reader :input_audio_noise_reduction - - sig do - params( - input_audio_noise_reduction: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::OrHash - ).void - end - attr_writer :input_audio_noise_reduction - - # Configuration for input audio transcription. The client can optionally set the - # language and prompt for transcription, these offer additional guidance to the - # transcription service. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription - ) - ) - end - attr_reader :input_audio_transcription - - sig do - params( - input_audio_transcription: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::OrHash - ).void - end - attr_writer :input_audio_transcription - - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection - ) - ) - end - attr_reader :turn_detection - - sig do - params( - turn_detection: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::OrHash - ).void - end - attr_writer :turn_detection - # Realtime transcription session object configuration. sig do params( - model: - T.any( - String, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol - ), + audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio::OrHash, include: T::Array[ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol ], - input_audio_format: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol, - input_audio_noise_reduction: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::OrHash, - input_audio_transcription: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::OrHash, - turn_detection: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::OrHash, type: Symbol ).returns(T.attached_class) end def self.new( - # ID of the model to use. The options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source - # Whisper V2 model). - model:, - # The set of items to include in the transcription. Current available items are: + # Configuration for input and output audio. + audio: nil, + # Additional fields to include in server outputs. # - # - `item.input_audio_transcription.logprobs` + # `item.input_audio_transcription.logprobs`: Include logprobs for input audio + # transcription. include: nil, - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - input_audio_format: nil, - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - input_audio_noise_reduction: nil, - # Configuration for input audio transcription. The client can optionally set the - # language and prompt for transcription, these offer additional guidance to the - # transcription service. - input_audio_transcription: nil, - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - turn_detection: nil, # The type of session to create. Always `transcription` for transcription # sessions. type: :transcription @@ -195,80 +85,18 @@ module OpenAI sig do override.returns( { - model: - T.any( - String, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol - ), type: Symbol, + audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio, include: T::Array[ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol - ], - input_audio_format: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol, - input_audio_noise_reduction: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction, - input_audio_transcription: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription, - turn_detection: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection + ] } ) end def to_hash end - # ID of the model to use. The options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source - # Whisper V2 model). - module Model - extend OpenAI::Internal::Type::Union - - Variants = - T.type_alias do - T.any( - String, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol - ) - end - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::Variants - ] - ) - end - def self.variants - end - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - WHISPER_1 = - T.let( - :"whisper-1", - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol - ) - GPT_4O_TRANSCRIBE = - T.let( - :"gpt-4o-transcribe", - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol - ) - GPT_4O_MINI_TRANSCRIBE = - T.let( - :"gpt-4o-mini-transcribe", - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol - ) - end - module Include extend OpenAI::Internal::Type::Enum @@ -297,406 +125,6 @@ module OpenAI def self.values end end - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For - # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel - # (mono), and little-endian byte order. - module InputAudioFormat - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - PCM16 = - T.let( - :pcm16, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol - ) - G711_ULAW = - T.let( - :g711_ulaw, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol - ) - G711_ALAW = - T.let( - :g711_alaw, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol - ] - ) - end - def self.values - end - end - - class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction, - OpenAI::Internal::AnyHash - ) - end - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol - ).void - end - attr_writer :type - - # Configuration for input audio noise reduction. This can be set to `null` to turn - # off. Noise reduction filters audio added to the input audio buffer before it is - # sent to VAD and the model. Filtering the audio can improve VAD and turn - # detection accuracy (reducing false positives) and model performance by improving - # perception of the input audio. - sig do - params( - type: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - type: nil - ) - end - - sig do - override.returns( - { - type: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol - } - ) - end - def to_hash - end - - # Type of noise reduction. `near_field` is for close-talking microphones such as - # headphones, `far_field` is for far-field microphones such as laptop or - # conference room microphones. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - NEAR_FIELD = - T.let( - :near_field, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol - ) - FAR_FIELD = - T.let( - :far_field, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class InputAudioTranscription < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription, - OpenAI::Internal::AnyHash - ) - end - - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - sig { returns(T.nilable(String)) } - attr_reader :language - - sig { params(language: String).void } - attr_writer :language - - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol - ) - ) - end - attr_reader :model - - sig do - params( - model: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol - ).void - end - attr_writer :model - - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - sig { returns(T.nilable(String)) } - attr_reader :prompt - - sig { params(prompt: String).void } - attr_writer :prompt - - # Configuration for input audio transcription. The client can optionally set the - # language and prompt for transcription, these offer additional guidance to the - # transcription service. - sig do - params( - language: String, - model: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol, - prompt: String - ).returns(T.attached_class) - end - def self.new( - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - language: nil, - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - model: nil, - # An optional text to guide the model's style or continue a previous audio - # segment. For `whisper-1`, the - # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - # For `gpt-4o-transcribe` models, the prompt is a free text string, for example - # "expect words related to technology". - prompt: nil - ) - end - - sig do - override.returns( - { - language: String, - model: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol, - prompt: String - } - ) - end - def to_hash - end - - # The model to use for transcription, current options are `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, and `whisper-1`. - module Model - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - GPT_4O_TRANSCRIBE = - T.let( - :"gpt-4o-transcribe", - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol - ) - GPT_4O_MINI_TRANSCRIBE = - T.let( - :"gpt-4o-mini-transcribe", - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol - ) - WHISPER_1 = - T.let( - :"whisper-1", - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class TurnDetection < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection, - OpenAI::Internal::AnyHash - ) - end - - # Amount of audio to include before the VAD detected speech (in milliseconds). - # Defaults to 300ms. - sig { returns(T.nilable(Integer)) } - attr_reader :prefix_padding_ms - - sig { params(prefix_padding_ms: Integer).void } - attr_writer :prefix_padding_ms - - # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. - # With shorter values the model will respond more quickly, but may jump in on - # short pauses from the user. - sig { returns(T.nilable(Integer)) } - attr_reader :silence_duration_ms - - sig { params(silence_duration_ms: Integer).void } - attr_writer :silence_duration_ms - - # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher - # threshold will require louder audio to activate the model, and thus might - # perform better in noisy environments. - sig { returns(T.nilable(Float)) } - attr_reader :threshold - - sig { params(threshold: Float).void } - attr_writer :threshold - - # Type of turn detection. Only `server_vad` is currently supported for - # transcription sessions. - sig do - returns( - T.nilable( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol - ).void - end - attr_writer :type - - # Configuration for turn detection. Can be set to `null` to turn off. Server VAD - # means that the model will detect the start and end of speech based on audio - # volume and respond at the end of user speech. - sig do - params( - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # Amount of audio to include before the VAD detected speech (in milliseconds). - # Defaults to 300ms. - prefix_padding_ms: nil, - # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. - # With shorter values the model will respond more quickly, but may jump in on - # short pauses from the user. - silence_duration_ms: nil, - # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher - # threshold will require louder audio to activate the model, and thus might - # perform better in noisy environments. - threshold: nil, - # Type of turn detection. Only `server_vad` is currently supported for - # transcription sessions. - type: nil - ) - end - - sig do - override.returns( - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol - } - ) - end - def to_hash - end - - # Type of turn detection. Only `server_vad` is currently supported for - # transcription sessions. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - SERVER_VAD = - T.let( - :server_vad, - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end end end end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi b/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi new file mode 100644 index 00000000..ac8e98ae --- /dev/null +++ b/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi @@ -0,0 +1,195 @@ +# typed: strong + +module OpenAI + module Models + RealtimeTranscriptionSessionCreateResponse = + Realtime::RealtimeTranscriptionSessionCreateResponse + + module Realtime + class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, + OpenAI::Internal::AnyHash + ) + end + + # Ephemeral key returned by the API. Only present when the session is created on + # the server via REST API. + sig do + returns(OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret) + end + attr_reader :client_secret + + sig do + params( + client_secret: + OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash + ).void + end + attr_writer :client_secret + + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + sig { returns(T.nilable(String)) } + attr_reader :input_audio_format + + sig { params(input_audio_format: String).void } + attr_writer :input_audio_format + + # Configuration of the transcription model. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription + ) + ) + end + attr_reader :input_audio_transcription + + sig do + params( + input_audio_transcription: + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash + ).void + end + attr_writer :input_audio_transcription + + # The set of modalities the model can respond with. To disable audio, set this to + # ["text"]. + sig do + returns( + T.nilable( + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol + ] + ) + ) + end + attr_reader :modalities + + sig do + params( + modalities: + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol + ] + ).void + end + attr_writer :modalities + + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection + ) + ) + end + attr_reader :turn_detection + + sig do + params( + turn_detection: + OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash + ).void + end + attr_writer :turn_detection + + # A new Realtime transcription session configuration. + # + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. + sig do + params( + client_secret: + OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash, + input_audio_format: String, + input_audio_transcription: + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash, + modalities: + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol + ], + turn_detection: + OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash + ).returns(T.attached_class) + end + def self.new( + # Ephemeral key returned by the API. Only present when the session is created on + # the server via REST API. + client_secret:, + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + input_audio_format: nil, + # Configuration of the transcription model. + input_audio_transcription: nil, + # The set of modalities the model can respond with. To disable audio, set this to + # ["text"]. + modalities: nil, + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + turn_detection: nil + ) + end + + sig do + override.returns( + { + client_secret: + OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret, + input_audio_format: String, + input_audio_transcription: + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription, + modalities: + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol + ], + turn_detection: + OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection + } + ) + end + def to_hash + end + + module Modality + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + TEXT = + T.let( + :text, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol + ) + AUDIO = + T.let( + :audio, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi b/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi new file mode 100644 index 00000000..2aee3f4a --- /dev/null +++ b/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi @@ -0,0 +1,144 @@ +# typed: strong + +module OpenAI + module Models + RealtimeTranscriptionSessionInputAudioTranscription = + Realtime::RealtimeTranscriptionSessionInputAudioTranscription + + module Realtime + class RealtimeTranscriptionSessionInputAudioTranscription < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription, + OpenAI::Internal::AnyHash + ) + end + + # The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + sig { returns(T.nilable(String)) } + attr_reader :language + + sig { params(language: String).void } + attr_writer :language + + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + sig do + returns( + T.nilable( + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol + ) + ) + end + attr_reader :model + + sig do + params( + model: + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::OrSymbol + ).void + end + attr_writer :model + + # An optional text to guide the model's style or continue a previous audio + # segment. For `whisper-1`, the + # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). + # For `gpt-4o-transcribe` models, the prompt is a free text string, for example + # "expect words related to technology". + sig { returns(T.nilable(String)) } + attr_reader :prompt + + sig { params(prompt: String).void } + attr_writer :prompt + + # Configuration of the transcription model. + sig do + params( + language: String, + model: + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::OrSymbol, + prompt: String + ).returns(T.attached_class) + end + def self.new( + # The language of the input audio. Supplying the input language in + # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) + # format will improve accuracy and latency. + language: nil, + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + model: nil, + # An optional text to guide the model's style or continue a previous audio + # segment. For `whisper-1`, the + # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). + # For `gpt-4o-transcribe` models, the prompt is a free text string, for example + # "expect words related to technology". + prompt: nil + ) + end + + sig do + override.returns( + { + language: String, + model: + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol, + prompt: String + } + ) + end + def to_hash + end + + # The model to use for transcription. Current options are `whisper-1`, + # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. + module Model + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + WHISPER_1 = + T.let( + :"whisper-1", + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol + ) + GPT_4O_TRANSCRIBE_LATEST = + T.let( + :"gpt-4o-transcribe-latest", + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol + ) + GPT_4O_MINI_TRANSCRIBE = + T.let( + :"gpt-4o-mini-transcribe", + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol + ) + GPT_4O_TRANSCRIBE = + T.let( + :"gpt-4o-transcribe", + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi b/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi new file mode 100644 index 00000000..ee74cc23 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi @@ -0,0 +1,94 @@ +# typed: strong + +module OpenAI + module Models + RealtimeTranscriptionSessionTurnDetection = + Realtime::RealtimeTranscriptionSessionTurnDetection + + module Realtime + class RealtimeTranscriptionSessionTurnDetection < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection, + OpenAI::Internal::AnyHash + ) + end + + # Amount of audio to include before the VAD detected speech (in milliseconds). + # Defaults to 300ms. + sig { returns(T.nilable(Integer)) } + attr_reader :prefix_padding_ms + + sig { params(prefix_padding_ms: Integer).void } + attr_writer :prefix_padding_ms + + # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. + # With shorter values the model will respond more quickly, but may jump in on + # short pauses from the user. + sig { returns(T.nilable(Integer)) } + attr_reader :silence_duration_ms + + sig { params(silence_duration_ms: Integer).void } + attr_writer :silence_duration_ms + + # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher + # threshold will require louder audio to activate the model, and thus might + # perform better in noisy environments. + sig { returns(T.nilable(Float)) } + attr_reader :threshold + + sig { params(threshold: Float).void } + attr_writer :threshold + + # Type of turn detection, only `server_vad` is currently supported. + sig { returns(T.nilable(String)) } + attr_reader :type + + sig { params(type: String).void } + attr_writer :type + + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + sig do + params( + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: String + ).returns(T.attached_class) + end + def self.new( + # Amount of audio to include before the VAD detected speech (in milliseconds). + # Defaults to 300ms. + prefix_padding_ms: nil, + # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. + # With shorter values the model will respond more quickly, but may jump in on + # short pauses from the user. + silence_duration_ms: nil, + # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher + # threshold will require louder audio to activate the model, and thus might + # perform better in noisy environments. + threshold: nil, + # Type of turn detection, only `server_vad` is currently supported. + type: nil + ) + end + + sig do + override.returns( + { + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: String + } + ) + end + def to_hash + end + end + end + end +end diff --git a/rbi/openai/models/realtime/realtime_truncation.rbi b/rbi/openai/models/realtime/realtime_truncation.rbi index 689ad7a5..a86ecf53 100644 --- a/rbi/openai/models/realtime/realtime_truncation.rbi +++ b/rbi/openai/models/realtime/realtime_truncation.rbi @@ -4,8 +4,7 @@ module OpenAI module Models module Realtime # Controls how the realtime conversation is truncated prior to model inference. - # The default is `auto`. When set to `retention_ratio`, the server retains a - # fraction of the conversation tokens prior to the instructions. + # The default is `auto`. module RealtimeTruncation extend OpenAI::Internal::Type::Union @@ -13,11 +12,13 @@ module OpenAI T.type_alias do T.any( OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::TaggedSymbol, - OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation + OpenAI::Realtime::RealtimeTruncationRetentionRatio ) end - # The truncation strategy to use for the session. + # The truncation strategy to use for the session. `auto` is the default truncation + # strategy. `disabled` will disable truncation and emit errors when the + # conversation exceeds the input token limit. module RealtimeTruncationStrategy extend OpenAI::Internal::Type::Enum @@ -52,58 +53,6 @@ module OpenAI end end - class RetentionRatioTruncation < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation, - OpenAI::Internal::AnyHash - ) - end - - # Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0). - sig { returns(Float) } - attr_accessor :retention_ratio - - # Use retention ratio truncation. - sig { returns(Symbol) } - attr_accessor :type - - # Optional cap on tokens allowed after the instructions. - sig { returns(T.nilable(Integer)) } - attr_accessor :post_instructions_token_limit - - # Retain a fraction of the conversation tokens. - sig do - params( - retention_ratio: Float, - post_instructions_token_limit: T.nilable(Integer), - type: Symbol - ).returns(T.attached_class) - end - def self.new( - # Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0). - retention_ratio:, - # Optional cap on tokens allowed after the instructions. - post_instructions_token_limit: nil, - # Use retention ratio truncation. - type: :retention_ratio - ) - end - - sig do - override.returns( - { - retention_ratio: Float, - type: Symbol, - post_instructions_token_limit: T.nilable(Integer) - } - ) - end - def to_hash - end - end - sig do override.returns( T::Array[OpenAI::Realtime::RealtimeTruncation::Variants] diff --git a/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi b/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi new file mode 100644 index 00000000..d7929b09 --- /dev/null +++ b/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi @@ -0,0 +1,45 @@ +# typed: strong + +module OpenAI + module Models + module Realtime + class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeTruncationRetentionRatio, + OpenAI::Internal::AnyHash + ) + end + + # Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the + # conversation exceeds the input token limit. + sig { returns(Float) } + attr_accessor :retention_ratio + + # Use retention ratio truncation. + sig { returns(Symbol) } + attr_accessor :type + + # Retain a fraction of the conversation tokens when the conversation exceeds the + # input token limit. This allows you to amortize truncations across multiple + # turns, which can help improve cached token usage. + sig do + params(retention_ratio: Float, type: Symbol).returns(T.attached_class) + end + def self.new( + # Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the + # conversation exceeds the input token limit. + retention_ratio:, + # Use retention ratio truncation. + type: :retention_ratio + ) + end + + sig { override.returns({ retention_ratio: Float, type: Symbol }) } + def to_hash + end + end + end + end +end diff --git a/rbi/openai/models/realtime/response_cancel_event.rbi b/rbi/openai/models/realtime/response_cancel_event.rbi index a077257f..918bfb68 100644 --- a/rbi/openai/models/realtime/response_cancel_event.rbi +++ b/rbi/openai/models/realtime/response_cancel_event.rbi @@ -33,7 +33,9 @@ module OpenAI # Send this event to cancel an in-progress response. The server will respond with # a `response.done` event with a status of `response.status=cancelled`. If there - # is no response to cancel, the server will respond with an error. + # is no response to cancel, the server will respond with an error. It's safe to + # call `response.cancel` even if no response is in progress, an error will be + # returned the session will remain unaffected. sig do params(event_id: String, response_id: String, type: Symbol).returns( T.attached_class diff --git a/rbi/openai/models/realtime/response_create_event.rbi b/rbi/openai/models/realtime/response_create_event.rbi index c12d987a..19406b09 100644 --- a/rbi/openai/models/realtime/response_create_event.rbi +++ b/rbi/openai/models/realtime/response_create_event.rbi @@ -25,13 +25,13 @@ module OpenAI # Create a new Realtime response with these parameters sig do - returns(T.nilable(OpenAI::Realtime::ResponseCreateEvent::Response)) + returns(T.nilable(OpenAI::Realtime::RealtimeResponseCreateParams)) end attr_reader :response sig do params( - response: OpenAI::Realtime::ResponseCreateEvent::Response::OrHash + response: OpenAI::Realtime::RealtimeResponseCreateParams::OrHash ).void end attr_writer :response @@ -42,19 +42,30 @@ module OpenAI # # A Response will include at least one Item, and may have two, in which case the # second will be a function call. These Items will be appended to the conversation - # history. + # history by default. # # The server will respond with a `response.created` event, events for Items and # content created, and finally a `response.done` event to indicate the Response is # complete. # - # The `response.create` event includes inference configuration like - # `instructions`, and `temperature`. These fields will override the Session's - # configuration for this Response only. + # The `response.create` event includes inference configuration like `instructions` + # and `tools`. If these are set, they will override the Session's configuration + # for this Response only. + # + # Responses can be created out-of-band of the default Conversation, meaning that + # they can have arbitrary input, and it's possible to disable writing the output + # to the Conversation. Only one Response can write to the default Conversation at + # a time, but otherwise multiple Responses can be created in parallel. The + # `metadata` field is a good way to disambiguate multiple simultaneous Responses. + # + # Clients can set `conversation` to `none` to create a Response that does not + # write to the default Conversation. Arbitrary input can be provided with the + # `input` field, which is an array accepting raw Items and references to existing + # Items. sig do params( event_id: String, - response: OpenAI::Realtime::ResponseCreateEvent::Response::OrHash, + response: OpenAI::Realtime::RealtimeResponseCreateParams::OrHash, type: Symbol ).returns(T.attached_class) end @@ -73,790 +84,12 @@ module OpenAI { type: Symbol, event_id: String, - response: OpenAI::Realtime::ResponseCreateEvent::Response + response: OpenAI::Realtime::RealtimeResponseCreateParams } ) end def to_hash end - - class Response < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::ResponseCreateEvent::Response, - OpenAI::Internal::AnyHash - ) - end - - # Controls which conversation the response is added to. Currently supports `auto` - # and `none`, with `auto` as the default value. The `auto` value means that the - # contents of the response will be added to the default conversation. Set this to - # `none` to create an out-of-band response which will not add items to default - # conversation. - sig do - returns( - T.nilable( - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::OrSymbol - ) - ) - ) - end - attr_reader :conversation - - sig do - params( - conversation: - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::OrSymbol - ) - ).void - end - attr_writer :conversation - - # Input items to include in the prompt for the model. Using this field creates a - # new context for this Response instead of using the default conversation. An - # empty array `[]` will clear the context for this Response. Note that this can - # include references to items from the default conversation. - sig do - returns( - T.nilable( - T::Array[ - T.any( - OpenAI::Realtime::RealtimeConversationItemSystemMessage, - OpenAI::Realtime::RealtimeConversationItemUserMessage, - OpenAI::Realtime::RealtimeConversationItemAssistantMessage, - OpenAI::Realtime::RealtimeConversationItemFunctionCall, - OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput, - OpenAI::Realtime::RealtimeMcpApprovalResponse, - OpenAI::Realtime::RealtimeMcpListTools, - OpenAI::Realtime::RealtimeMcpToolCall, - OpenAI::Realtime::RealtimeMcpApprovalRequest - ) - ] - ) - ) - end - attr_reader :input - - sig do - params( - input: - T::Array[ - T.any( - OpenAI::Realtime::RealtimeConversationItemSystemMessage::OrHash, - OpenAI::Realtime::RealtimeConversationItemUserMessage::OrHash, - OpenAI::Realtime::RealtimeConversationItemAssistantMessage::OrHash, - OpenAI::Realtime::RealtimeConversationItemFunctionCall::OrHash, - OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput::OrHash, - OpenAI::Realtime::RealtimeMcpApprovalResponse::OrHash, - OpenAI::Realtime::RealtimeMcpListTools::OrHash, - OpenAI::Realtime::RealtimeMcpToolCall::OrHash, - OpenAI::Realtime::RealtimeMcpApprovalRequest::OrHash - ) - ] - ).void - end - attr_writer :input - - # The default system instructions (i.e. system message) prepended to model calls. - # This field allows the client to guide the model on desired responses. The model - # can be instructed on response content and format, (e.g. "be extremely succinct", - # "act friendly", "here are examples of good responses") and on audio behavior - # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The - # instructions are not guaranteed to be followed by the model, but they provide - # guidance to the model on the desired behavior. - # - # Note that the server sets default instructions which will be used if this field - # is not set and are visible in the `session.created` event at the start of the - # session. - sig { returns(T.nilable(String)) } - attr_reader :instructions - - sig { params(instructions: String).void } - attr_writer :instructions - - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or - # `inf` for the maximum available tokens for a given model. Defaults to `inf`. - sig { returns(T.nilable(T.any(Integer, Symbol))) } - attr_reader :max_output_tokens - - sig { params(max_output_tokens: T.any(Integer, Symbol)).void } - attr_writer :max_output_tokens - - # Set of 16 key-value pairs that can be attached to an object. This can be useful - # for storing additional information about the object in a structured format, and - # querying for objects via API or the dashboard. - # - # Keys are strings with a maximum length of 64 characters. Values are strings with - # a maximum length of 512 characters. - sig { returns(T.nilable(T::Hash[Symbol, String])) } - attr_accessor :metadata - - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. - sig do - returns( - T.nilable( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Modality::OrSymbol - ] - ) - ) - end - attr_reader :modalities - - sig do - params( - modalities: - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Modality::OrSymbol - ] - ).void - end - attr_writer :modalities - - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - sig do - returns( - T.nilable( - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::OrSymbol - ) - ) - end - attr_reader :output_audio_format - - sig do - params( - output_audio_format: - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::OrSymbol - ).void - end - attr_writer :output_audio_format - - # Reference to a prompt template and its variables. - # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). - sig { returns(T.nilable(OpenAI::Responses::ResponsePrompt)) } - attr_reader :prompt - - sig do - params( - prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash) - ).void - end - attr_writer :prompt - - # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - sig { returns(T.nilable(Float)) } - attr_reader :temperature - - sig { params(temperature: Float).void } - attr_writer :temperature - - # How the model chooses tools. Provide one of the string modes or force a specific - # function/MCP tool. - sig do - returns( - T.nilable( - T.any( - OpenAI::Responses::ToolChoiceOptions::OrSymbol, - OpenAI::Responses::ToolChoiceFunction, - OpenAI::Responses::ToolChoiceMcp - ) - ) - ) - end - attr_reader :tool_choice - - sig do - params( - tool_choice: - T.any( - OpenAI::Responses::ToolChoiceOptions::OrSymbol, - OpenAI::Responses::ToolChoiceFunction::OrHash, - OpenAI::Responses::ToolChoiceMcp::OrHash - ) - ).void - end - attr_writer :tool_choice - - # Tools (functions) available to the model. - sig do - returns( - T.nilable( - T::Array[OpenAI::Realtime::ResponseCreateEvent::Response::Tool] - ) - ) - end - attr_reader :tools - - sig do - params( - tools: - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::OrHash - ] - ).void - end - attr_writer :tools - - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - sig do - returns( - T.nilable( - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::OrSymbol - ) - ) - ) - end - attr_reader :voice - - sig do - params( - voice: - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::OrSymbol - ) - ).void - end - attr_writer :voice - - # Create a new Realtime response with these parameters - sig do - params( - conversation: - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::OrSymbol - ), - input: - T::Array[ - T.any( - OpenAI::Realtime::RealtimeConversationItemSystemMessage::OrHash, - OpenAI::Realtime::RealtimeConversationItemUserMessage::OrHash, - OpenAI::Realtime::RealtimeConversationItemAssistantMessage::OrHash, - OpenAI::Realtime::RealtimeConversationItemFunctionCall::OrHash, - OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput::OrHash, - OpenAI::Realtime::RealtimeMcpApprovalResponse::OrHash, - OpenAI::Realtime::RealtimeMcpListTools::OrHash, - OpenAI::Realtime::RealtimeMcpToolCall::OrHash, - OpenAI::Realtime::RealtimeMcpApprovalRequest::OrHash - ) - ], - instructions: String, - max_output_tokens: T.any(Integer, Symbol), - metadata: T.nilable(T::Hash[Symbol, String]), - modalities: - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Modality::OrSymbol - ], - output_audio_format: - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::OrSymbol, - prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash), - temperature: Float, - tool_choice: - T.any( - OpenAI::Responses::ToolChoiceOptions::OrSymbol, - OpenAI::Responses::ToolChoiceFunction::OrHash, - OpenAI::Responses::ToolChoiceMcp::OrHash - ), - tools: - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::OrHash - ], - voice: - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::OrSymbol - ) - ).returns(T.attached_class) - end - def self.new( - # Controls which conversation the response is added to. Currently supports `auto` - # and `none`, with `auto` as the default value. The `auto` value means that the - # contents of the response will be added to the default conversation. Set this to - # `none` to create an out-of-band response which will not add items to default - # conversation. - conversation: nil, - # Input items to include in the prompt for the model. Using this field creates a - # new context for this Response instead of using the default conversation. An - # empty array `[]` will clear the context for this Response. Note that this can - # include references to items from the default conversation. - input: nil, - # The default system instructions (i.e. system message) prepended to model calls. - # This field allows the client to guide the model on desired responses. The model - # can be instructed on response content and format, (e.g. "be extremely succinct", - # "act friendly", "here are examples of good responses") and on audio behavior - # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The - # instructions are not guaranteed to be followed by the model, but they provide - # guidance to the model on the desired behavior. - # - # Note that the server sets default instructions which will be used if this field - # is not set and are visible in the `session.created` event at the start of the - # session. - instructions: nil, - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or - # `inf` for the maximum available tokens for a given model. Defaults to `inf`. - max_output_tokens: nil, - # Set of 16 key-value pairs that can be attached to an object. This can be useful - # for storing additional information about the object in a structured format, and - # querying for objects via API or the dashboard. - # - # Keys are strings with a maximum length of 64 characters. Values are strings with - # a maximum length of 512 characters. - metadata: nil, - # The set of modalities the model can respond with. To disable audio, set this to - # ["text"]. - modalities: nil, - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - output_audio_format: nil, - # Reference to a prompt template and its variables. - # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). - prompt: nil, - # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. - temperature: nil, - # How the model chooses tools. Provide one of the string modes or force a specific - # function/MCP tool. - tool_choice: nil, - # Tools (functions) available to the model. - tools: nil, - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - voice: nil - ) - end - - sig do - override.returns( - { - conversation: - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::OrSymbol - ), - input: - T::Array[ - T.any( - OpenAI::Realtime::RealtimeConversationItemSystemMessage, - OpenAI::Realtime::RealtimeConversationItemUserMessage, - OpenAI::Realtime::RealtimeConversationItemAssistantMessage, - OpenAI::Realtime::RealtimeConversationItemFunctionCall, - OpenAI::Realtime::RealtimeConversationItemFunctionCallOutput, - OpenAI::Realtime::RealtimeMcpApprovalResponse, - OpenAI::Realtime::RealtimeMcpListTools, - OpenAI::Realtime::RealtimeMcpToolCall, - OpenAI::Realtime::RealtimeMcpApprovalRequest - ) - ], - instructions: String, - max_output_tokens: T.any(Integer, Symbol), - metadata: T.nilable(T::Hash[Symbol, String]), - modalities: - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Modality::OrSymbol - ], - output_audio_format: - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::OrSymbol, - prompt: T.nilable(OpenAI::Responses::ResponsePrompt), - temperature: Float, - tool_choice: - T.any( - OpenAI::Responses::ToolChoiceOptions::OrSymbol, - OpenAI::Responses::ToolChoiceFunction, - OpenAI::Responses::ToolChoiceMcp - ), - tools: - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Tool - ], - voice: - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::OrSymbol - ) - } - ) - end - def to_hash - end - - # Controls which conversation the response is added to. Currently supports `auto` - # and `none`, with `auto` as the default value. The `auto` value means that the - # contents of the response will be added to the default conversation. Set this to - # `none` to create an out-of-band response which will not add items to default - # conversation. - module Conversation - extend OpenAI::Internal::Type::Union - - Variants = - T.type_alias do - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::TaggedSymbol - ) - end - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::Variants - ] - ) - end - def self.variants - end - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - AUTO = - T.let( - :auto, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::TaggedSymbol - ) - NONE = - T.let( - :none, - OpenAI::Realtime::ResponseCreateEvent::Response::Conversation::TaggedSymbol - ) - end - - # Maximum number of output tokens for a single assistant response, inclusive of - # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or - # `inf` for the maximum available tokens for a given model. Defaults to `inf`. - module MaxOutputTokens - extend OpenAI::Internal::Type::Union - - Variants = T.type_alias { T.any(Integer, Symbol) } - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::MaxOutputTokens::Variants - ] - ) - end - def self.variants - end - end - - module Modality - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::ResponseCreateEvent::Response::Modality - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - TEXT = - T.let( - :text, - OpenAI::Realtime::ResponseCreateEvent::Response::Modality::TaggedSymbol - ) - AUDIO = - T.let( - :audio, - OpenAI::Realtime::ResponseCreateEvent::Response::Modality::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Modality::TaggedSymbol - ] - ) - end - def self.values - end - end - - # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - module OutputAudioFormat - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - PCM16 = - T.let( - :pcm16, - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::TaggedSymbol - ) - G711_ULAW = - T.let( - :g711_ulaw, - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::TaggedSymbol - ) - G711_ALAW = - T.let( - :g711_alaw, - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::OutputAudioFormat::TaggedSymbol - ] - ) - end - def self.values - end - end - - # How the model chooses tools. Provide one of the string modes or force a specific - # function/MCP tool. - module ToolChoice - extend OpenAI::Internal::Type::Union - - Variants = - T.type_alias do - T.any( - OpenAI::Responses::ToolChoiceOptions::TaggedSymbol, - OpenAI::Responses::ToolChoiceFunction, - OpenAI::Responses::ToolChoiceMcp - ) - end - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::ToolChoice::Variants - ] - ) - end - def self.variants - end - end - - class Tool < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::ResponseCreateEvent::Response::Tool, - OpenAI::Internal::AnyHash - ) - end - - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - sig { returns(T.nilable(String)) } - attr_reader :description - - sig { params(description: String).void } - attr_writer :description - - # The name of the function. - sig { returns(T.nilable(String)) } - attr_reader :name - - sig { params(name: String).void } - attr_writer :name - - # Parameters of the function in JSON Schema. - sig { returns(T.nilable(T.anything)) } - attr_reader :parameters - - sig { params(parameters: T.anything).void } - attr_writer :parameters - - # The type of the tool, i.e. `function`. - sig do - returns( - T.nilable( - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type::OrSymbol - ).void - end - attr_writer :type - - sig do - params( - description: String, - name: String, - parameters: T.anything, - type: - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new( - # The description of the function, including guidance on when and how to call it, - # and guidance about what to tell the user when calling (if anything). - description: nil, - # The name of the function. - name: nil, - # Parameters of the function in JSON Schema. - parameters: nil, - # The type of the tool, i.e. `function`. - type: nil - ) - end - - sig do - override.returns( - { - description: String, - name: String, - parameters: T.anything, - type: - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type::OrSymbol - } - ) - end - def to_hash - end - - # The type of the tool, i.e. `function`. - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - FUNCTION = - T.let( - :function, - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Tool::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - # The voice the model uses to respond. Voice cannot be changed during the session - # once the model has responded with audio at least once. Current voice options are - # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`. - module Voice - extend OpenAI::Internal::Type::Union - - Variants = - T.type_alias do - T.any( - String, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - end - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::Variants - ] - ) - end - def self.variants - end - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - ALLOY = - T.let( - :alloy, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - ASH = - T.let( - :ash, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - BALLAD = - T.let( - :ballad, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - CORAL = - T.let( - :coral, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - ECHO = - T.let( - :echo, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - SAGE = - T.let( - :sage, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - SHIMMER = - T.let( - :shimmer, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - VERSE = - T.let( - :verse, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - MARIN = - T.let( - :marin, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - CEDAR = - T.let( - :cedar, - OpenAI::Realtime::ResponseCreateEvent::Response::Voice::TaggedSymbol - ) - end - end end end end diff --git a/rbi/openai/models/realtime/response_done_event.rbi b/rbi/openai/models/realtime/response_done_event.rbi index 61f892c5..716c5235 100644 --- a/rbi/openai/models/realtime/response_done_event.rbi +++ b/rbi/openai/models/realtime/response_done_event.rbi @@ -32,6 +32,13 @@ module OpenAI # Returned when a Response is done streaming. Always emitted, no matter the final # state. The Response object included in the `response.done` event will include # all output Items in the Response but will omit the raw audio data. + # + # Clients should check the `status` field of the Response to determine if it was + # successful (`completed`) or if there was another outcome: `cancelled`, `failed`, + # or `incomplete`. + # + # A response will contain all output items that were generated during the + # response, excluding any audio content. sig do params( event_id: String, diff --git a/rbi/openai/models/realtime/session_created_event.rbi b/rbi/openai/models/realtime/session_created_event.rbi index 6c90c87f..8193b5ac 100644 --- a/rbi/openai/models/realtime/session_created_event.rbi +++ b/rbi/openai/models/realtime/session_created_event.rbi @@ -16,12 +16,16 @@ module OpenAI sig { returns(String) } attr_accessor :event_id - # Realtime session object. - sig { returns(OpenAI::Realtime::RealtimeSession) } - attr_reader :session - - sig { params(session: OpenAI::Realtime::RealtimeSession::OrHash).void } - attr_writer :session + # The session configuration. + sig do + returns( + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ) + ) + end + attr_accessor :session # The event type, must be `session.created`. sig { returns(Symbol) } @@ -33,14 +37,18 @@ module OpenAI sig do params( event_id: String, - session: OpenAI::Realtime::RealtimeSession::OrHash, + session: + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest::OrHash, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::OrHash + ), type: Symbol ).returns(T.attached_class) end def self.new( # The unique ID of the server event. event_id:, - # Realtime session object. + # The session configuration. session:, # The event type, must be `session.created`. type: :"session.created" @@ -51,13 +59,38 @@ module OpenAI override.returns( { event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ), type: Symbol } ) end def to_hash end + + # The session configuration. + module Session + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ) + end + + sig do + override.returns( + T::Array[OpenAI::Realtime::SessionCreatedEvent::Session::Variants] + ) + end + def self.variants + end + end end end end diff --git a/rbi/openai/models/realtime/session_update_event.rbi b/rbi/openai/models/realtime/session_update_event.rbi index 0f155e1f..3bd47a99 100644 --- a/rbi/openai/models/realtime/session_update_event.rbi +++ b/rbi/openai/models/realtime/session_update_event.rbi @@ -12,48 +12,60 @@ module OpenAI ) end - # Realtime session object configuration. - sig { returns(OpenAI::Realtime::RealtimeSessionCreateRequest) } - attr_reader :session - + # Update the Realtime session. Choose either a realtime session or a transcription + # session. sig do - params( - session: OpenAI::Realtime::RealtimeSessionCreateRequest::OrHash - ).void + returns( + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ) + ) end - attr_writer :session + attr_accessor :session # The event type, must be `session.update`. sig { returns(Symbol) } attr_accessor :type - # Optional client-generated ID used to identify this event. + # Optional client-generated ID used to identify this event. This is an arbitrary + # string that a client may assign. It will be passed back if there is an error + # with the event, but the corresponding `session.updated` event will not include + # it. sig { returns(T.nilable(String)) } attr_reader :event_id sig { params(event_id: String).void } attr_writer :event_id - # Send this event to update the session’s default configuration. The client may - # send this event at any time to update any field, except for `voice`. However, - # note that once a session has been initialized with a particular `model`, it - # can’t be changed to another model using `session.update`. + # Send this event to update the session’s configuration. The client may send this + # event at any time to update any field except for `voice` and `model`. `voice` + # can be updated only if there have been no other audio outputs yet. # # When the server receives a `session.update`, it will respond with a # `session.updated` event showing the full, effective configuration. Only the - # fields that are present are updated. To clear a field like `instructions`, pass - # an empty string. + # fields that are present in the `session.update` are updated. To clear a field + # like `instructions`, pass an empty string. To clear a field like `tools`, pass + # an empty array. To clear a field like `turn_detection`, pass `null`. sig do params( - session: OpenAI::Realtime::RealtimeSessionCreateRequest::OrHash, + session: + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest::OrHash, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::OrHash + ), event_id: String, type: Symbol ).returns(T.attached_class) end def self.new( - # Realtime session object configuration. + # Update the Realtime session. Choose either a realtime session or a transcription + # session. session:, - # Optional client-generated ID used to identify this event. + # Optional client-generated ID used to identify this event. This is an arbitrary + # string that a client may assign. It will be passed back if there is an error + # with the event, but the corresponding `session.updated` event will not include + # it. event_id: nil, # The event type, must be `session.update`. type: :"session.update" @@ -63,7 +75,11 @@ module OpenAI sig do override.returns( { - session: OpenAI::Realtime::RealtimeSessionCreateRequest, + session: + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ), type: Symbol, event_id: String } @@ -71,6 +87,28 @@ module OpenAI end def to_hash end + + # Update the Realtime session. Choose either a realtime session or a transcription + # session. + module Session + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ) + end + + sig do + override.returns( + T::Array[OpenAI::Realtime::SessionUpdateEvent::Session::Variants] + ) + end + def self.variants + end + end end end end diff --git a/rbi/openai/models/realtime/session_updated_event.rbi b/rbi/openai/models/realtime/session_updated_event.rbi index be675d6d..55e84661 100644 --- a/rbi/openai/models/realtime/session_updated_event.rbi +++ b/rbi/openai/models/realtime/session_updated_event.rbi @@ -16,12 +16,16 @@ module OpenAI sig { returns(String) } attr_accessor :event_id - # Realtime session object. - sig { returns(OpenAI::Realtime::RealtimeSession) } - attr_reader :session - - sig { params(session: OpenAI::Realtime::RealtimeSession::OrHash).void } - attr_writer :session + # The session configuration. + sig do + returns( + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ) + ) + end + attr_accessor :session # The event type, must be `session.updated`. sig { returns(Symbol) } @@ -32,14 +36,18 @@ module OpenAI sig do params( event_id: String, - session: OpenAI::Realtime::RealtimeSession::OrHash, + session: + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest::OrHash, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::OrHash + ), type: Symbol ).returns(T.attached_class) end def self.new( # The unique ID of the server event. event_id:, - # Realtime session object. + # The session configuration. session:, # The event type, must be `session.updated`. type: :"session.updated" @@ -50,13 +58,38 @@ module OpenAI override.returns( { event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ), type: Symbol } ) end def to_hash end + + # The session configuration. + module Session + extend OpenAI::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + OpenAI::Realtime::RealtimeSessionCreateRequest, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + ) + end + + sig do + override.returns( + T::Array[OpenAI::Realtime::SessionUpdatedEvent::Session::Variants] + ) + end + def self.variants + end + end end end end diff --git a/rbi/openai/models/realtime/transcription_session_created.rbi b/rbi/openai/models/realtime/transcription_session_created.rbi index 9cabe10d..9ca459db 100644 --- a/rbi/openai/models/realtime/transcription_session_created.rbi +++ b/rbi/openai/models/realtime/transcription_session_created.rbi @@ -16,14 +16,20 @@ module OpenAI sig { returns(String) } attr_accessor :event_id - # A Realtime transcription session configuration object. - sig { returns(OpenAI::Realtime::TranscriptionSessionCreated::Session) } + # A new Realtime transcription session configuration. + # + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. + sig do + returns(OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse) + end attr_reader :session sig do params( session: - OpenAI::Realtime::TranscriptionSessionCreated::Session::OrHash + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::OrHash ).void end attr_writer :session @@ -37,14 +43,18 @@ module OpenAI params( event_id: String, session: - OpenAI::Realtime::TranscriptionSessionCreated::Session::OrHash, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::OrHash, type: Symbol ).returns(T.attached_class) end def self.new( # The unique ID of the server event. event_id:, - # A Realtime transcription session configuration object. + # A new Realtime transcription session configuration. + # + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. session:, # The event type, must be `transcription_session.created`. type: :"transcription_session.created" @@ -55,598 +65,14 @@ module OpenAI override.returns( { event_id: String, - session: OpenAI::Realtime::TranscriptionSessionCreated::Session, + session: + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, type: Symbol } ) end def to_hash end - - class Session < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionCreated::Session, - OpenAI::Internal::AnyHash - ) - end - - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - sig { returns(T.nilable(String)) } - attr_reader :id - - sig { params(id: String).void } - attr_writer :id - - # Configuration for input audio for the session. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio - ) - ) - end - attr_reader :audio - - sig do - params( - audio: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::OrHash - ).void - end - attr_writer :audio - - # Expiration timestamp for the session, in seconds since epoch. - sig { returns(T.nilable(Integer)) } - attr_reader :expires_at - - sig { params(expires_at: Integer).void } - attr_writer :expires_at - - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - sig do - returns( - T.nilable( - T::Array[ - OpenAI::Realtime::TranscriptionSessionCreated::Session::Include::OrSymbol - ] - ) - ) - end - attr_reader :include - - sig do - params( - include: - T::Array[ - OpenAI::Realtime::TranscriptionSessionCreated::Session::Include::OrSymbol - ] - ).void - end - attr_writer :include - - # The object type. Always `realtime.transcription_session`. - sig { returns(T.nilable(String)) } - attr_reader :object - - sig { params(object: String).void } - attr_writer :object - - # A Realtime transcription session configuration object. - sig do - params( - id: String, - audio: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::OrHash, - expires_at: Integer, - include: - T::Array[ - OpenAI::Realtime::TranscriptionSessionCreated::Session::Include::OrSymbol - ], - object: String - ).returns(T.attached_class) - end - def self.new( - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - id: nil, - # Configuration for input audio for the session. - audio: nil, - # Expiration timestamp for the session, in seconds since epoch. - expires_at: nil, - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - include: nil, - # The object type. Always `realtime.transcription_session`. - object: nil - ) - end - - sig do - override.returns( - { - id: String, - audio: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio, - expires_at: Integer, - include: - T::Array[ - OpenAI::Realtime::TranscriptionSessionCreated::Session::Include::OrSymbol - ], - object: String - } - ) - end - def to_hash - end - - class Audio < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio, - OpenAI::Internal::AnyHash - ) - end - - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input - ) - ) - end - attr_reader :input - - sig do - params( - input: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::OrHash - ).void - end - attr_writer :input - - # Configuration for input audio for the session. - sig do - params( - input: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::OrHash - ).returns(T.attached_class) - end - def self.new(input: nil) - end - - sig do - override.returns( - { - input: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input - } - ) - end - def to_hash - end - - class Input < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input, - OpenAI::Internal::AnyHash - ) - end - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - sig { returns(T.nilable(String)) } - attr_reader :format_ - - sig { params(format_: String).void } - attr_writer :format_ - - # Configuration for input audio noise reduction. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction - ) - ) - end - attr_reader :noise_reduction - - sig do - params( - noise_reduction: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::OrHash - ).void - end - attr_writer :noise_reduction - - # Configuration of the transcription model. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription - ) - ) - end - attr_reader :transcription - - sig do - params( - transcription: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::OrHash - ).void - end - attr_writer :transcription - - # Configuration for turn detection. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection - ) - ) - end - attr_reader :turn_detection - - sig do - params( - turn_detection: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection::OrHash - ).void - end - attr_writer :turn_detection - - sig do - params( - format_: String, - noise_reduction: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::OrHash, - transcription: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::OrHash, - turn_detection: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection::OrHash - ).returns(T.attached_class) - end - def self.new( - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - format_: nil, - # Configuration for input audio noise reduction. - noise_reduction: nil, - # Configuration of the transcription model. - transcription: nil, - # Configuration for turn detection. - turn_detection: nil - ) - end - - sig do - override.returns( - { - format_: String, - noise_reduction: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, - transcription: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, - turn_detection: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection - } - ) - end - def to_hash - end - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, - OpenAI::Internal::AnyHash - ) - end - - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type::OrSymbol - ).void - end - attr_writer :type - - # Configuration for input audio noise reduction. - sig do - params( - type: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new(type: nil) - end - - sig do - override.returns( - { - type: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type::OrSymbol - } - ) - end - def to_hash - end - - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - NEAR_FIELD = - T.let( - :near_field, - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - FAR_FIELD = - T.let( - :far_field, - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class Transcription < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, - OpenAI::Internal::AnyHash - ) - end - - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - sig { returns(T.nilable(String)) } - attr_reader :language - - sig { params(language: String).void } - attr_writer :language - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::OrSymbol - ) - ) - end - attr_reader :model - - sig do - params( - model: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::OrSymbol - ).void - end - attr_writer :model - - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - sig { returns(T.nilable(String)) } - attr_reader :prompt - - sig { params(prompt: String).void } - attr_writer :prompt - - # Configuration of the transcription model. - sig do - params( - language: String, - model: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::OrSymbol, - prompt: String - ).returns(T.attached_class) - end - def self.new( - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - language: nil, - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - model: nil, - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - prompt: nil - ) - end - - sig do - override.returns( - { - language: String, - model: - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::OrSymbol, - prompt: String - } - ) - end - def to_hash - end - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - module Model - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - GPT_4O_TRANSCRIBE = - T.let( - :"gpt-4o-transcribe", - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::TaggedSymbol - ) - GPT_4O_MINI_TRANSCRIBE = - T.let( - :"gpt-4o-mini-transcribe", - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::TaggedSymbol - ) - WHISPER_1 = - T.let( - :"whisper-1", - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class TurnDetection < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection, - OpenAI::Internal::AnyHash - ) - end - - sig { returns(T.nilable(Integer)) } - attr_reader :prefix_padding_ms - - sig { params(prefix_padding_ms: Integer).void } - attr_writer :prefix_padding_ms - - sig { returns(T.nilable(Integer)) } - attr_reader :silence_duration_ms - - sig { params(silence_duration_ms: Integer).void } - attr_writer :silence_duration_ms - - sig { returns(T.nilable(Float)) } - attr_reader :threshold - - sig { params(threshold: Float).void } - attr_writer :threshold - - # Type of turn detection, only `server_vad` is currently supported. - sig { returns(T.nilable(String)) } - attr_reader :type - - sig { params(type: String).void } - attr_writer :type - - # Configuration for turn detection. - sig do - params( - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - ).returns(T.attached_class) - end - def self.new( - prefix_padding_ms: nil, - silence_duration_ms: nil, - threshold: nil, - # Type of turn detection, only `server_vad` is currently supported. - type: nil - ) - end - - sig do - override.returns( - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - ) - end - def to_hash - end - end - end - end - - module Include - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::TranscriptionSessionCreated::Session::Include - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = - T.let( - :"item.input_audio_transcription.logprobs", - OpenAI::Realtime::TranscriptionSessionCreated::Session::Include::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::TranscriptionSessionCreated::Session::Include::TaggedSymbol - ] - ) - end - def self.values - end - end - end end end end diff --git a/rbi/openai/models/realtime/transcription_session_update.rbi b/rbi/openai/models/realtime/transcription_session_update.rbi index fd1352ec..5d9af0f3 100644 --- a/rbi/openai/models/realtime/transcription_session_update.rbi +++ b/rbi/openai/models/realtime/transcription_session_update.rbi @@ -13,15 +13,13 @@ module OpenAI end # Realtime transcription session object configuration. - sig do - returns(OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest) - end + sig { returns(OpenAI::Realtime::TranscriptionSessionUpdate::Session) } attr_reader :session sig do params( session: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::OrHash + OpenAI::Realtime::TranscriptionSessionUpdate::Session::OrHash ).void end attr_writer :session @@ -41,7 +39,7 @@ module OpenAI sig do params( session: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::OrHash, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::OrHash, event_id: String, type: Symbol ).returns(T.attached_class) @@ -59,8 +57,7 @@ module OpenAI sig do override.returns( { - session: - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest, + session: OpenAI::Realtime::TranscriptionSessionUpdate::Session, type: Symbol, event_id: String } @@ -68,6 +65,427 @@ module OpenAI end def to_hash end + + class Session < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::TranscriptionSessionUpdate::Session, + OpenAI::Internal::AnyHash + ) + end + + # The set of items to include in the transcription. Current available items are: + # `item.input_audio_transcription.logprobs` + sig do + returns( + T.nilable( + T::Array[ + OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol + ] + ) + ) + end + attr_reader :include + + sig do + params( + include: + T::Array[ + OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol + ] + ).void + end + attr_writer :include + + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + # (mono), and little-endian byte order. + sig do + returns( + T.nilable( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol + ) + ) + end + attr_reader :input_audio_format + + sig do + params( + input_audio_format: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol + ).void + end + attr_writer :input_audio_format + + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + sig do + returns( + T.nilable( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction + ) + ) + end + attr_reader :input_audio_noise_reduction + + sig do + params( + input_audio_noise_reduction: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction::OrHash + ).void + end + attr_writer :input_audio_noise_reduction + + # Configuration for input audio transcription. The client can optionally set the + # language and prompt for transcription, these offer additional guidance to the + # transcription service. + sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) } + attr_reader :input_audio_transcription + + sig do + params( + input_audio_transcription: + OpenAI::Realtime::AudioTranscription::OrHash + ).void + end + attr_writer :input_audio_transcription + + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + sig do + returns( + T.nilable( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection + ) + ) + end + attr_reader :turn_detection + + sig do + params( + turn_detection: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::OrHash + ).void + end + attr_writer :turn_detection + + # Realtime transcription session object configuration. + sig do + params( + include: + T::Array[ + OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol + ], + input_audio_format: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol, + input_audio_noise_reduction: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction::OrHash, + input_audio_transcription: + OpenAI::Realtime::AudioTranscription::OrHash, + turn_detection: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::OrHash + ).returns(T.attached_class) + end + def self.new( + # The set of items to include in the transcription. Current available items are: + # `item.input_audio_transcription.logprobs` + include: nil, + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + # (mono), and little-endian byte order. + input_audio_format: nil, + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + input_audio_noise_reduction: nil, + # Configuration for input audio transcription. The client can optionally set the + # language and prompt for transcription, these offer additional guidance to the + # transcription service. + input_audio_transcription: nil, + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + turn_detection: nil + ) + end + + sig do + override.returns( + { + include: + T::Array[ + OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol + ], + input_audio_format: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol, + input_audio_noise_reduction: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, + input_audio_transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection + } + ) + end + def to_hash + end + + module Include + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = + T.let( + :"item.input_audio_transcription.logprobs", + OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::TaggedSymbol + ] + ) + end + def self.values + end + end + + # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For + # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel + # (mono), and little-endian byte order. + module InputAudioFormat + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + PCM16 = + T.let( + :pcm16, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol + ) + G711_ULAW = + T.let( + :g711_ulaw, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol + ) + G711_ALAW = + T.let( + :g711_alaw, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol + ] + ) + end + def self.values + end + end + + class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, + OpenAI::Internal::AnyHash + ) + end + + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + sig do + returns(T.nilable(OpenAI::Realtime::NoiseReductionType::OrSymbol)) + end + attr_reader :type + + sig do + params(type: OpenAI::Realtime::NoiseReductionType::OrSymbol).void + end + attr_writer :type + + # Configuration for input audio noise reduction. This can be set to `null` to turn + # off. Noise reduction filters audio added to the input audio buffer before it is + # sent to VAD and the model. Filtering the audio can improve VAD and turn + # detection accuracy (reducing false positives) and model performance by improving + # perception of the input audio. + sig do + params( + type: OpenAI::Realtime::NoiseReductionType::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # Type of noise reduction. `near_field` is for close-talking microphones such as + # headphones, `far_field` is for far-field microphones such as laptop or + # conference room microphones. + type: nil + ) + end + + sig do + override.returns( + { type: OpenAI::Realtime::NoiseReductionType::OrSymbol } + ) + end + def to_hash + end + end + + class TurnDetection < OpenAI::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection, + OpenAI::Internal::AnyHash + ) + end + + # Amount of audio to include before the VAD detected speech (in milliseconds). + # Defaults to 300ms. + sig { returns(T.nilable(Integer)) } + attr_reader :prefix_padding_ms + + sig { params(prefix_padding_ms: Integer).void } + attr_writer :prefix_padding_ms + + # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. + # With shorter values the model will respond more quickly, but may jump in on + # short pauses from the user. + sig { returns(T.nilable(Integer)) } + attr_reader :silence_duration_ms + + sig { params(silence_duration_ms: Integer).void } + attr_writer :silence_duration_ms + + # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher + # threshold will require louder audio to activate the model, and thus might + # perform better in noisy environments. + sig { returns(T.nilable(Float)) } + attr_reader :threshold + + sig { params(threshold: Float).void } + attr_writer :threshold + + # Type of turn detection. Only `server_vad` is currently supported for + # transcription sessions. + sig do + returns( + T.nilable( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol + ) + ) + end + attr_reader :type + + sig do + params( + type: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol + ).void + end + attr_writer :type + + # Configuration for turn detection. Can be set to `null` to turn off. Server VAD + # means that the model will detect the start and end of speech based on audio + # volume and respond at the end of user speech. + sig do + params( + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol + ).returns(T.attached_class) + end + def self.new( + # Amount of audio to include before the VAD detected speech (in milliseconds). + # Defaults to 300ms. + prefix_padding_ms: nil, + # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. + # With shorter values the model will respond more quickly, but may jump in on + # short pauses from the user. + silence_duration_ms: nil, + # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher + # threshold will require louder audio to activate the model, and thus might + # perform better in noisy environments. + threshold: nil, + # Type of turn detection. Only `server_vad` is currently supported for + # transcription sessions. + type: nil + ) + end + + sig do + override.returns( + { + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol + } + ) + end + def to_hash + end + + # Type of turn detection. Only `server_vad` is currently supported for + # transcription sessions. + module Type + extend OpenAI::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + SERVER_VAD = + T.let( + :server_vad, + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end end end end diff --git a/rbi/openai/models/realtime/transcription_session_updated_event.rbi b/rbi/openai/models/realtime/transcription_session_updated_event.rbi index e4ceed66..a930600a 100644 --- a/rbi/openai/models/realtime/transcription_session_updated_event.rbi +++ b/rbi/openai/models/realtime/transcription_session_updated_event.rbi @@ -16,16 +16,20 @@ module OpenAI sig { returns(String) } attr_accessor :event_id - # A Realtime transcription session configuration object. + # A new Realtime transcription session configuration. + # + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. sig do - returns(OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session) + returns(OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse) end attr_reader :session sig do params( session: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::OrHash + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::OrHash ).void end attr_writer :session @@ -40,14 +44,18 @@ module OpenAI params( event_id: String, session: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::OrHash, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::OrHash, type: Symbol ).returns(T.attached_class) end def self.new( # The unique ID of the server event. event_id:, - # A Realtime transcription session configuration object. + # A new Realtime transcription session configuration. + # + # When a session is created on the server via REST API, the session object also + # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + # not present when a session is updated via the WebSocket API. session:, # The event type, must be `transcription_session.updated`. type: :"transcription_session.updated" @@ -59,598 +67,13 @@ module OpenAI { event_id: String, session: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session, + OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, type: Symbol } ) end def to_hash end - - class Session < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session, - OpenAI::Internal::AnyHash - ) - end - - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - sig { returns(T.nilable(String)) } - attr_reader :id - - sig { params(id: String).void } - attr_writer :id - - # Configuration for input audio for the session. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio - ) - ) - end - attr_reader :audio - - sig do - params( - audio: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::OrHash - ).void - end - attr_writer :audio - - # Expiration timestamp for the session, in seconds since epoch. - sig { returns(T.nilable(Integer)) } - attr_reader :expires_at - - sig { params(expires_at: Integer).void } - attr_writer :expires_at - - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - sig do - returns( - T.nilable( - T::Array[ - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include::OrSymbol - ] - ) - ) - end - attr_reader :include - - sig do - params( - include: - T::Array[ - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include::OrSymbol - ] - ).void - end - attr_writer :include - - # The object type. Always `realtime.transcription_session`. - sig { returns(T.nilable(String)) } - attr_reader :object - - sig { params(object: String).void } - attr_writer :object - - # A Realtime transcription session configuration object. - sig do - params( - id: String, - audio: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::OrHash, - expires_at: Integer, - include: - T::Array[ - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include::OrSymbol - ], - object: String - ).returns(T.attached_class) - end - def self.new( - # Unique identifier for the session that looks like `sess_1234567890abcdef`. - id: nil, - # Configuration for input audio for the session. - audio: nil, - # Expiration timestamp for the session, in seconds since epoch. - expires_at: nil, - # Additional fields to include in server outputs. - # - # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio - # transcription. - include: nil, - # The object type. Always `realtime.transcription_session`. - object: nil - ) - end - - sig do - override.returns( - { - id: String, - audio: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio, - expires_at: Integer, - include: - T::Array[ - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include::OrSymbol - ], - object: String - } - ) - end - def to_hash - end - - class Audio < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio, - OpenAI::Internal::AnyHash - ) - end - - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input - ) - ) - end - attr_reader :input - - sig do - params( - input: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::OrHash - ).void - end - attr_writer :input - - # Configuration for input audio for the session. - sig do - params( - input: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::OrHash - ).returns(T.attached_class) - end - def self.new(input: nil) - end - - sig do - override.returns( - { - input: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input - } - ) - end - def to_hash - end - - class Input < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input, - OpenAI::Internal::AnyHash - ) - end - - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - sig { returns(T.nilable(String)) } - attr_reader :format_ - - sig { params(format_: String).void } - attr_writer :format_ - - # Configuration for input audio noise reduction. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction - ) - ) - end - attr_reader :noise_reduction - - sig do - params( - noise_reduction: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::OrHash - ).void - end - attr_writer :noise_reduction - - # Configuration of the transcription model. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription - ) - ) - end - attr_reader :transcription - - sig do - params( - transcription: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::OrHash - ).void - end - attr_writer :transcription - - # Configuration for turn detection. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection - ) - ) - end - attr_reader :turn_detection - - sig do - params( - turn_detection: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection::OrHash - ).void - end - attr_writer :turn_detection - - sig do - params( - format_: String, - noise_reduction: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::OrHash, - transcription: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::OrHash, - turn_detection: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection::OrHash - ).returns(T.attached_class) - end - def self.new( - # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. - format_: nil, - # Configuration for input audio noise reduction. - noise_reduction: nil, - # Configuration of the transcription model. - transcription: nil, - # Configuration for turn detection. - turn_detection: nil - ) - end - - sig do - override.returns( - { - format_: String, - noise_reduction: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction, - transcription: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription, - turn_detection: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection - } - ) - end - def to_hash - end - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction, - OpenAI::Internal::AnyHash - ) - end - - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type::OrSymbol - ) - ) - end - attr_reader :type - - sig do - params( - type: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type::OrSymbol - ).void - end - attr_writer :type - - # Configuration for input audio noise reduction. - sig do - params( - type: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type::OrSymbol - ).returns(T.attached_class) - end - def self.new(type: nil) - end - - sig do - override.returns( - { - type: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type::OrSymbol - } - ) - end - def to_hash - end - - module Type - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - NEAR_FIELD = - T.let( - :near_field, - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - FAR_FIELD = - T.let( - :far_field, - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::Type::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class Transcription < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription, - OpenAI::Internal::AnyHash - ) - end - - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - sig { returns(T.nilable(String)) } - attr_reader :language - - sig { params(language: String).void } - attr_writer :language - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - sig do - returns( - T.nilable( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::OrSymbol - ) - ) - end - attr_reader :model - - sig do - params( - model: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::OrSymbol - ).void - end - attr_writer :model - - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - sig { returns(T.nilable(String)) } - attr_reader :prompt - - sig { params(prompt: String).void } - attr_writer :prompt - - # Configuration of the transcription model. - sig do - params( - language: String, - model: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::OrSymbol, - prompt: String - ).returns(T.attached_class) - end - def self.new( - # The language of the input audio. Supplying the input language in - # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - # format will improve accuracy and latency. - language: nil, - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - model: nil, - # An optional text to guide the model's style or continue a previous audio - # segment. The - # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) - # should match the audio language. - prompt: nil - ) - end - - sig do - override.returns( - { - language: String, - model: - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::OrSymbol, - prompt: String - } - ) - end - def to_hash - end - - # The model to use for transcription. Can be `gpt-4o-transcribe`, - # `gpt-4o-mini-transcribe`, or `whisper-1`. - module Model - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - GPT_4O_TRANSCRIBE = - T.let( - :"gpt-4o-transcribe", - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::TaggedSymbol - ) - GPT_4O_MINI_TRANSCRIBE = - T.let( - :"gpt-4o-mini-transcribe", - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::TaggedSymbol - ) - WHISPER_1 = - T.let( - :"whisper-1", - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::Model::TaggedSymbol - ] - ) - end - def self.values - end - end - end - - class TurnDetection < OpenAI::Internal::Type::BaseModel - OrHash = - T.type_alias do - T.any( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection, - OpenAI::Internal::AnyHash - ) - end - - sig { returns(T.nilable(Integer)) } - attr_reader :prefix_padding_ms - - sig { params(prefix_padding_ms: Integer).void } - attr_writer :prefix_padding_ms - - sig { returns(T.nilable(Integer)) } - attr_reader :silence_duration_ms - - sig { params(silence_duration_ms: Integer).void } - attr_writer :silence_duration_ms - - sig { returns(T.nilable(Float)) } - attr_reader :threshold - - sig { params(threshold: Float).void } - attr_writer :threshold - - # Type of turn detection, only `server_vad` is currently supported. - sig { returns(T.nilable(String)) } - attr_reader :type - - sig { params(type: String).void } - attr_writer :type - - # Configuration for turn detection. - sig do - params( - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - ).returns(T.attached_class) - end - def self.new( - prefix_padding_ms: nil, - silence_duration_ms: nil, - threshold: nil, - # Type of turn detection, only `server_vad` is currently supported. - type: nil - ) - end - - sig do - override.returns( - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - ) - end - def to_hash - end - end - end - end - - module Include - extend OpenAI::Internal::Type::Enum - - TaggedSymbol = - T.type_alias do - T.all( - Symbol, - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include - ) - end - OrSymbol = T.type_alias { T.any(Symbol, String) } - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = - T.let( - :"item.input_audio_transcription.logprobs", - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include::TaggedSymbol - ) - - sig do - override.returns( - T::Array[ - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Include::TaggedSymbol - ] - ) - end - def self.values - end - end - end end end end diff --git a/rbi/openai/resources/realtime/client_secrets.rbi b/rbi/openai/resources/realtime/client_secrets.rbi index 6e7304fc..d055f18f 100644 --- a/rbi/openai/resources/realtime/client_secrets.rbi +++ b/rbi/openai/resources/realtime/client_secrets.rbi @@ -4,8 +4,7 @@ module OpenAI module Resources class Realtime class ClientSecrets - # Create a Realtime session and client secret for either realtime or - # transcription. + # Create a Realtime client secret with an associated session configuration. sig do params( expires_after: @@ -19,7 +18,10 @@ module OpenAI ).returns(OpenAI::Models::Realtime::ClientSecretCreateResponse) end def create( - # Configuration for the ephemeral token expiration. + # Configuration for the client secret expiration. Expiration refers to the time + # after which a client secret will no longer be valid for creating sessions. The + # session itself may continue after that time once started. A secret can be used + # to create multiple sessions until it expires. expires_after: nil, # Session configuration to use for the client secret. Choose either a realtime # session or a transcription session. diff --git a/sig/openai/models/realtime/audio_transcription.rbs b/sig/openai/models/realtime/audio_transcription.rbs new file mode 100644 index 00000000..4b79dfb9 --- /dev/null +++ b/sig/openai/models/realtime/audio_transcription.rbs @@ -0,0 +1,57 @@ +module OpenAI + module Models + module Realtime + type audio_transcription = + { + language: String, + model: OpenAI::Models::Realtime::AudioTranscription::model, + prompt: String + } + + class AudioTranscription < OpenAI::Internal::Type::BaseModel + attr_reader language: String? + + def language=: (String) -> String + + attr_reader model: OpenAI::Models::Realtime::AudioTranscription::model? + + def model=: ( + OpenAI::Models::Realtime::AudioTranscription::model + ) -> OpenAI::Models::Realtime::AudioTranscription::model + + attr_reader prompt: String? + + def prompt=: (String) -> String + + def initialize: ( + ?language: String, + ?model: OpenAI::Models::Realtime::AudioTranscription::model, + ?prompt: String + ) -> void + + def to_hash: -> { + language: String, + model: OpenAI::Models::Realtime::AudioTranscription::model, + prompt: String + } + + type model = + :"whisper-1" + | :"gpt-4o-transcribe-latest" + | :"gpt-4o-mini-transcribe" + | :"gpt-4o-transcribe" + + module Model + extend OpenAI::Internal::Type::Enum + + WHISPER_1: :"whisper-1" + GPT_4O_TRANSCRIBE_LATEST: :"gpt-4o-transcribe-latest" + GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" + GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" + + def self?.values: -> ::Array[OpenAI::Models::Realtime::AudioTranscription::model] + end + end + end + end +end diff --git a/sig/openai/models/realtime/client_secret_create_response.rbs b/sig/openai/models/realtime/client_secret_create_response.rbs index 0d1f2609..106045f7 100644 --- a/sig/openai/models/realtime/client_secret_create_response.rbs +++ b/sig/openai/models/realtime/client_secret_create_response.rbs @@ -29,261 +29,11 @@ module OpenAI type session = OpenAI::Realtime::RealtimeSessionCreateResponse - | OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse + | OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse module Session extend OpenAI::Internal::Type::Union - type realtime_transcription_session_create_response = - { - id: String, - audio: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio, - expires_at: Integer, - include: ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::include_], - object: String - } - - class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel - attr_reader id: String? - - def id=: (String) -> String - - attr_reader audio: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio? - - def audio=: ( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio - ) -> OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio - - attr_reader expires_at: Integer? - - def expires_at=: (Integer) -> Integer - - attr_reader include: ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::include_]? - - def include=: ( - ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::include_] - ) -> ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::include_] - - attr_reader object: String? - - def object=: (String) -> String - - def initialize: ( - ?id: String, - ?audio: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio, - ?expires_at: Integer, - ?include: ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::include_], - ?object: String - ) -> void - - def to_hash: -> { - id: String, - audio: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio, - expires_at: Integer, - include: ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::include_], - object: String - } - - type audio = - { - input: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input - } - - class Audio < OpenAI::Internal::Type::BaseModel - attr_reader input: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input? - - def input=: ( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input - ) -> OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input - - def initialize: ( - ?input: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input - ) -> void - - def to_hash: -> { - input: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input - } - - type input = - { - format_: String, - noise_reduction: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction, - transcription: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription, - turn_detection: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection - } - - class Input < OpenAI::Internal::Type::BaseModel - attr_reader format_: String? - - def format_=: (String) -> String - - attr_reader noise_reduction: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction? - - def noise_reduction=: ( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction - ) -> OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction - - attr_reader transcription: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription? - - def transcription=: ( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription - ) -> OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription - - attr_reader turn_detection: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection? - - def turn_detection=: ( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection - ) -> OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection - - def initialize: ( - ?format_: String, - ?noise_reduction: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction, - ?transcription: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription, - ?turn_detection: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection - ) -> void - - def to_hash: -> { - format_: String, - noise_reduction: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction, - transcription: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription, - turn_detection: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection - } - - type noise_reduction = - { - type: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::type_ - } - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - attr_reader type: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::type_? - - def type=: ( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::type_ - ) -> OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::type_ - - def initialize: ( - ?type: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::type_ - ) -> void - - def to_hash: -> { - type: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::type_ - } - - type type_ = :near_field | :far_field - - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD: :near_field - FAR_FIELD: :far_field - - def self?.values: -> ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::type_] - end - end - - type transcription = - { - language: String, - model: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::model, - prompt: String - } - - class Transcription < OpenAI::Internal::Type::BaseModel - attr_reader language: String? - - def language=: (String) -> String - - attr_reader model: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::model? - - def model=: ( - OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::model - ) -> OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::model - - attr_reader prompt: String? - - def prompt=: (String) -> String - - def initialize: ( - ?language: String, - ?model: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::model, - ?prompt: String - ) -> void - - def to_hash: -> { - language: String, - model: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::model, - prompt: String - } - - type model = - :"gpt-4o-transcribe" - | :"gpt-4o-mini-transcribe" - | :"whisper-1" - - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" - WHISPER_1: :"whisper-1" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::model] - end - end - - type turn_detection = - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - - class TurnDetection < OpenAI::Internal::Type::BaseModel - attr_reader prefix_padding_ms: Integer? - - def prefix_padding_ms=: (Integer) -> Integer - - attr_reader silence_duration_ms: Integer? - - def silence_duration_ms=: (Integer) -> Integer - - attr_reader threshold: Float? - - def threshold=: (Float) -> Float - - attr_reader type: String? - - def type=: (String) -> String - - def initialize: ( - ?prefix_padding_ms: Integer, - ?silence_duration_ms: Integer, - ?threshold: Float, - ?type: String - ) -> void - - def to_hash: -> { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - end - end - end - - type include_ = :"item.input_audio_transcription.logprobs" - - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS: :"item.input_audio_transcription.logprobs" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::include_] - end - end - def self?.variants: -> ::Array[OpenAI::Models::Realtime::ClientSecretCreateResponse::session] end end diff --git a/sig/openai/models/realtime/models.rbs b/sig/openai/models/realtime/models.rbs new file mode 100644 index 00000000..79d6fe86 --- /dev/null +++ b/sig/openai/models/realtime/models.rbs @@ -0,0 +1,57 @@ +module OpenAI + module Models + module Realtime + type models = + { + description: String, + name: String, + parameters: top, + type: OpenAI::Models::Realtime::Models::type_ + } + + class Models < OpenAI::Internal::Type::BaseModel + attr_reader description: String? + + def description=: (String) -> String + + attr_reader name: String? + + def name=: (String) -> String + + attr_reader parameters: top? + + def parameters=: (top) -> top + + attr_reader type: OpenAI::Models::Realtime::Models::type_? + + def type=: ( + OpenAI::Models::Realtime::Models::type_ + ) -> OpenAI::Models::Realtime::Models::type_ + + def initialize: ( + ?description: String, + ?name: String, + ?parameters: top, + ?type: OpenAI::Models::Realtime::Models::type_ + ) -> void + + def to_hash: -> { + description: String, + name: String, + parameters: top, + type: OpenAI::Models::Realtime::Models::type_ + } + + type type_ = :function + + module Type + extend OpenAI::Internal::Type::Enum + + FUNCTION: :function + + def self?.values: -> ::Array[OpenAI::Models::Realtime::Models::type_] + end + end + end + end +end diff --git a/sig/openai/models/realtime/noise_reduction_type.rbs b/sig/openai/models/realtime/noise_reduction_type.rbs new file mode 100644 index 00000000..c4d653f7 --- /dev/null +++ b/sig/openai/models/realtime/noise_reduction_type.rbs @@ -0,0 +1,16 @@ +module OpenAI + module Models + module Realtime + type noise_reduction_type = :near_field | :far_field + + module NoiseReductionType + extend OpenAI::Internal::Type::Enum + + NEAR_FIELD: :near_field + FAR_FIELD: :far_field + + def self?.values: -> ::Array[OpenAI::Models::Realtime::noise_reduction_type] + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_audio_config.rbs b/sig/openai/models/realtime/realtime_audio_config.rbs index 588789d6..957dddcf 100644 --- a/sig/openai/models/realtime/realtime_audio_config.rbs +++ b/sig/openai/models/realtime/realtime_audio_config.rbs @@ -3,351 +3,32 @@ module OpenAI module Realtime type realtime_audio_config = { - input: OpenAI::Realtime::RealtimeAudioConfig::Input, - output: OpenAI::Realtime::RealtimeAudioConfig::Output + input: OpenAI::Realtime::RealtimeAudioConfigInput, + output: OpenAI::Realtime::RealtimeAudioConfigOutput } class RealtimeAudioConfig < OpenAI::Internal::Type::BaseModel - attr_reader input: OpenAI::Realtime::RealtimeAudioConfig::Input? + attr_reader input: OpenAI::Realtime::RealtimeAudioConfigInput? def input=: ( - OpenAI::Realtime::RealtimeAudioConfig::Input - ) -> OpenAI::Realtime::RealtimeAudioConfig::Input + OpenAI::Realtime::RealtimeAudioConfigInput + ) -> OpenAI::Realtime::RealtimeAudioConfigInput - attr_reader output: OpenAI::Realtime::RealtimeAudioConfig::Output? + attr_reader output: OpenAI::Realtime::RealtimeAudioConfigOutput? def output=: ( - OpenAI::Realtime::RealtimeAudioConfig::Output - ) -> OpenAI::Realtime::RealtimeAudioConfig::Output + OpenAI::Realtime::RealtimeAudioConfigOutput + ) -> OpenAI::Realtime::RealtimeAudioConfigOutput def initialize: ( - ?input: OpenAI::Realtime::RealtimeAudioConfig::Input, - ?output: OpenAI::Realtime::RealtimeAudioConfig::Output + ?input: OpenAI::Realtime::RealtimeAudioConfigInput, + ?output: OpenAI::Realtime::RealtimeAudioConfigOutput ) -> void def to_hash: -> { - input: OpenAI::Realtime::RealtimeAudioConfig::Input, - output: OpenAI::Realtime::RealtimeAudioConfig::Output + input: OpenAI::Realtime::RealtimeAudioConfigInput, + output: OpenAI::Realtime::RealtimeAudioConfigOutput } - - type input = - { - format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::format_, - noise_reduction: OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction, - transcription: OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription, - turn_detection: OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection - } - - class Input < OpenAI::Internal::Type::BaseModel - attr_reader format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::format_? - - def format_=: ( - OpenAI::Models::Realtime::RealtimeAudioConfig::Input::format_ - ) -> OpenAI::Models::Realtime::RealtimeAudioConfig::Input::format_ - - attr_reader noise_reduction: OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction? - - def noise_reduction=: ( - OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction - ) -> OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction - - attr_reader transcription: OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription? - - def transcription=: ( - OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription - ) -> OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription - - attr_reader turn_detection: OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection? - - def turn_detection=: ( - OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection - ) -> OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection - - def initialize: ( - ?format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::format_, - ?noise_reduction: OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction, - ?transcription: OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription, - ?turn_detection: OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection - ) -> void - - def to_hash: -> { - format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::format_, - noise_reduction: OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction, - transcription: OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription, - turn_detection: OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection - } - - type format_ = :pcm16 | :g711_ulaw | :g711_alaw - - module Format - extend OpenAI::Internal::Type::Enum - - PCM16: :pcm16 - G711_ULAW: :g711_ulaw - G711_ALAW: :g711_alaw - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfig::Input::format_] - end - - type noise_reduction = - { - type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::type_ - } - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - attr_reader type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::type_? - - def type=: ( - OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::type_ - ) -> OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::type_ - - def initialize: ( - ?type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::type_ - ) -> void - - def to_hash: -> { - type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::type_ - } - - type type_ = :near_field | :far_field - - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD: :near_field - FAR_FIELD: :far_field - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfig::Input::NoiseReduction::type_] - end - end - - type transcription = - { - language: String, - model: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::model, - prompt: String - } - - class Transcription < OpenAI::Internal::Type::BaseModel - attr_reader language: String? - - def language=: (String) -> String - - attr_reader model: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::model? - - def model=: ( - OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::model - ) -> OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::model - - attr_reader prompt: String? - - def prompt=: (String) -> String - - def initialize: ( - ?language: String, - ?model: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::model, - ?prompt: String - ) -> void - - def to_hash: -> { - language: String, - model: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::model, - prompt: String - } - - type model = - :"whisper-1" - | :"gpt-4o-transcribe-latest" - | :"gpt-4o-mini-transcribe" - | :"gpt-4o-transcribe" - | :"gpt-4o-transcribe-diarize" - - module Model - extend OpenAI::Internal::Type::Enum - - WHISPER_1: :"whisper-1" - GPT_4O_TRANSCRIBE_LATEST: :"gpt-4o-transcribe-latest" - GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" - GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" - GPT_4O_TRANSCRIBE_DIARIZE: :"gpt-4o-transcribe-diarize" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfig::Input::Transcription::model] - end - end - - type turn_detection = - { - create_response: bool, - eagerness: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::eagerness, - idle_timeout_ms: Integer?, - interrupt_response: bool, - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::type_ - } - - class TurnDetection < OpenAI::Internal::Type::BaseModel - attr_reader create_response: bool? - - def create_response=: (bool) -> bool - - attr_reader eagerness: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::eagerness? - - def eagerness=: ( - OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::eagerness - ) -> OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::eagerness - - attr_accessor idle_timeout_ms: Integer? - - attr_reader interrupt_response: bool? - - def interrupt_response=: (bool) -> bool - - attr_reader prefix_padding_ms: Integer? - - def prefix_padding_ms=: (Integer) -> Integer - - attr_reader silence_duration_ms: Integer? - - def silence_duration_ms=: (Integer) -> Integer - - attr_reader threshold: Float? - - def threshold=: (Float) -> Float - - attr_reader type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::type_? - - def type=: ( - OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::type_ - ) -> OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::type_ - - def initialize: ( - ?create_response: bool, - ?eagerness: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::eagerness, - ?idle_timeout_ms: Integer?, - ?interrupt_response: bool, - ?prefix_padding_ms: Integer, - ?silence_duration_ms: Integer, - ?threshold: Float, - ?type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::type_ - ) -> void - - def to_hash: -> { - create_response: bool, - eagerness: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::eagerness, - idle_timeout_ms: Integer?, - interrupt_response: bool, - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::type_ - } - - type eagerness = :low | :medium | :high | :auto - - module Eagerness - extend OpenAI::Internal::Type::Enum - - LOW: :low - MEDIUM: :medium - HIGH: :high - AUTO: :auto - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::eagerness] - end - - type type_ = :server_vad | :semantic_vad - - module Type - extend OpenAI::Internal::Type::Enum - - SERVER_VAD: :server_vad - SEMANTIC_VAD: :semantic_vad - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfig::Input::TurnDetection::type_] - end - end - end - - type output = - { - format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::format_, - speed: Float, - voice: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::voice - } - - class Output < OpenAI::Internal::Type::BaseModel - attr_reader format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::format_? - - def format_=: ( - OpenAI::Models::Realtime::RealtimeAudioConfig::Output::format_ - ) -> OpenAI::Models::Realtime::RealtimeAudioConfig::Output::format_ - - attr_reader speed: Float? - - def speed=: (Float) -> Float - - attr_reader voice: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::voice? - - def voice=: ( - OpenAI::Models::Realtime::RealtimeAudioConfig::Output::voice - ) -> OpenAI::Models::Realtime::RealtimeAudioConfig::Output::voice - - def initialize: ( - ?format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::format_, - ?speed: Float, - ?voice: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::voice - ) -> void - - def to_hash: -> { - format_: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::format_, - speed: Float, - voice: OpenAI::Models::Realtime::RealtimeAudioConfig::Output::voice - } - - type format_ = :pcm16 | :g711_ulaw | :g711_alaw - - module Format - extend OpenAI::Internal::Type::Enum - - PCM16: :pcm16 - G711_ULAW: :g711_ulaw - G711_ALAW: :g711_alaw - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfig::Output::format_] - end - - type voice = - String - | :alloy - | :ash - | :ballad - | :coral - | :echo - | :sage - | :shimmer - | :verse - | :marin - | :cedar - - module Voice - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfig::Output::voice] - - ALLOY: :alloy - ASH: :ash - BALLAD: :ballad - CORAL: :coral - ECHO: :echo - SAGE: :sage - SHIMMER: :shimmer - VERSE: :verse - MARIN: :marin - CEDAR: :cedar - end - end end end end diff --git a/sig/openai/models/realtime/realtime_audio_config_input.rbs b/sig/openai/models/realtime/realtime_audio_config_input.rbs new file mode 100644 index 00000000..08d072de --- /dev/null +++ b/sig/openai/models/realtime/realtime_audio_config_input.rbs @@ -0,0 +1,72 @@ +module OpenAI + module Models + module Realtime + type realtime_audio_config_input = + { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + noise_reduction: OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction, + transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: OpenAI::Realtime::RealtimeAudioInputTurnDetection + } + + class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel + attr_reader format_: OpenAI::Models::Realtime::realtime_audio_formats? + + def format_=: ( + OpenAI::Models::Realtime::realtime_audio_formats + ) -> OpenAI::Models::Realtime::realtime_audio_formats + + attr_reader noise_reduction: OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction? + + def noise_reduction=: ( + OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction + ) -> OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction + + attr_reader transcription: OpenAI::Realtime::AudioTranscription? + + def transcription=: ( + OpenAI::Realtime::AudioTranscription + ) -> OpenAI::Realtime::AudioTranscription + + attr_reader turn_detection: OpenAI::Realtime::RealtimeAudioInputTurnDetection? + + def turn_detection=: ( + OpenAI::Realtime::RealtimeAudioInputTurnDetection + ) -> OpenAI::Realtime::RealtimeAudioInputTurnDetection + + def initialize: ( + ?format_: OpenAI::Models::Realtime::realtime_audio_formats, + ?noise_reduction: OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction, + ?transcription: OpenAI::Realtime::AudioTranscription, + ?turn_detection: OpenAI::Realtime::RealtimeAudioInputTurnDetection + ) -> void + + def to_hash: -> { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + noise_reduction: OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction, + transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: OpenAI::Realtime::RealtimeAudioInputTurnDetection + } + + type noise_reduction = + { type: OpenAI::Models::Realtime::noise_reduction_type } + + class NoiseReduction < OpenAI::Internal::Type::BaseModel + attr_reader type: OpenAI::Models::Realtime::noise_reduction_type? + + def type=: ( + OpenAI::Models::Realtime::noise_reduction_type + ) -> OpenAI::Models::Realtime::noise_reduction_type + + def initialize: ( + ?type: OpenAI::Models::Realtime::noise_reduction_type + ) -> void + + def to_hash: -> { + type: OpenAI::Models::Realtime::noise_reduction_type + } + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_audio_config_output.rbs b/sig/openai/models/realtime/realtime_audio_config_output.rbs new file mode 100644 index 00000000..86f914fd --- /dev/null +++ b/sig/openai/models/realtime/realtime_audio_config_output.rbs @@ -0,0 +1,72 @@ +module OpenAI + module Models + module Realtime + type realtime_audio_config_output = + { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + speed: Float, + voice: OpenAI::Models::Realtime::RealtimeAudioConfigOutput::voice + } + + class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel + attr_reader format_: OpenAI::Models::Realtime::realtime_audio_formats? + + def format_=: ( + OpenAI::Models::Realtime::realtime_audio_formats + ) -> OpenAI::Models::Realtime::realtime_audio_formats + + attr_reader speed: Float? + + def speed=: (Float) -> Float + + attr_reader voice: OpenAI::Models::Realtime::RealtimeAudioConfigOutput::voice? + + def voice=: ( + OpenAI::Models::Realtime::RealtimeAudioConfigOutput::voice + ) -> OpenAI::Models::Realtime::RealtimeAudioConfigOutput::voice + + def initialize: ( + ?format_: OpenAI::Models::Realtime::realtime_audio_formats, + ?speed: Float, + ?voice: OpenAI::Models::Realtime::RealtimeAudioConfigOutput::voice + ) -> void + + def to_hash: -> { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + speed: Float, + voice: OpenAI::Models::Realtime::RealtimeAudioConfigOutput::voice + } + + type voice = + String + | :alloy + | :ash + | :ballad + | :coral + | :echo + | :sage + | :shimmer + | :verse + | :marin + | :cedar + + module Voice + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioConfigOutput::voice] + + ALLOY: :alloy + ASH: :ash + BALLAD: :ballad + CORAL: :coral + ECHO: :echo + SAGE: :sage + SHIMMER: :shimmer + VERSE: :verse + MARIN: :marin + CEDAR: :cedar + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_audio_formats.rbs b/sig/openai/models/realtime/realtime_audio_formats.rbs new file mode 100644 index 00000000..4534d15f --- /dev/null +++ b/sig/openai/models/realtime/realtime_audio_formats.rbs @@ -0,0 +1,128 @@ +module OpenAI + module Models + module Realtime + type realtime_audio_formats = + OpenAI::Realtime::RealtimeAudioFormats::AudioPCM + | OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU + | OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA + + module RealtimeAudioFormats + extend OpenAI::Internal::Type::Union + + type audio_pcm = + { + rate: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::rate, + type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::type_ + } + + class AudioPCM < OpenAI::Internal::Type::BaseModel + attr_reader rate: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::rate? + + def rate=: ( + OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::rate + ) -> OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::rate + + attr_reader type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::type_? + + def type=: ( + OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::type_ + ) -> OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::type_ + + def initialize: ( + ?rate: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::rate, + ?type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::type_ + ) -> void + + def to_hash: -> { + rate: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::rate, + type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::type_ + } + + type rate = 24000 + + module Rate + extend OpenAI::Internal::Type::Enum + + RATE_24000: 24000 + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::rate] + end + + type type_ = :"audio/pcm" + + module Type + extend OpenAI::Internal::Type::Enum + + AUDIO_PCM: :"audio/pcm" + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::type_] + end + end + + type audio_pcmu = + { + type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::type_ + } + + class AudioPCMU < OpenAI::Internal::Type::BaseModel + attr_reader type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::type_? + + def type=: ( + OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::type_ + ) -> OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::type_ + + def initialize: ( + ?type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::type_ + ) -> void + + def to_hash: -> { + type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::type_ + } + + type type_ = :"audio/pcmu" + + module Type + extend OpenAI::Internal::Type::Enum + + AUDIO_PCMU: :"audio/pcmu" + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::type_] + end + end + + type audio_pcma = + { + type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::type_ + } + + class AudioPCMA < OpenAI::Internal::Type::BaseModel + attr_reader type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::type_? + + def type=: ( + OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::type_ + ) -> OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::type_ + + def initialize: ( + ?type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::type_ + ) -> void + + def to_hash: -> { + type: OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::type_ + } + + type type_ = :"audio/pcma" + + module Type + extend OpenAI::Internal::Type::Enum + + AUDIO_PCMA: :"audio/pcma" + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::type_] + end + end + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::realtime_audio_formats] + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs b/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs new file mode 100644 index 00000000..3a8b1c9e --- /dev/null +++ b/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs @@ -0,0 +1,99 @@ +module OpenAI + module Models + module Realtime + type realtime_audio_input_turn_detection = + { + create_response: bool, + eagerness: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::eagerness, + idle_timeout_ms: Integer?, + interrupt_response: bool, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::type_ + } + + class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel + attr_reader create_response: bool? + + def create_response=: (bool) -> bool + + attr_reader eagerness: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::eagerness? + + def eagerness=: ( + OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::eagerness + ) -> OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::eagerness + + attr_accessor idle_timeout_ms: Integer? + + attr_reader interrupt_response: bool? + + def interrupt_response=: (bool) -> bool + + attr_reader prefix_padding_ms: Integer? + + def prefix_padding_ms=: (Integer) -> Integer + + attr_reader silence_duration_ms: Integer? + + def silence_duration_ms=: (Integer) -> Integer + + attr_reader threshold: Float? + + def threshold=: (Float) -> Float + + attr_reader type: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::type_? + + def type=: ( + OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::type_ + ) -> OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::type_ + + def initialize: ( + ?create_response: bool, + ?eagerness: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::eagerness, + ?idle_timeout_ms: Integer?, + ?interrupt_response: bool, + ?prefix_padding_ms: Integer, + ?silence_duration_ms: Integer, + ?threshold: Float, + ?type: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::type_ + ) -> void + + def to_hash: -> { + create_response: bool, + eagerness: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::eagerness, + idle_timeout_ms: Integer?, + interrupt_response: bool, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::type_ + } + + type eagerness = :low | :medium | :high | :auto + + module Eagerness + extend OpenAI::Internal::Type::Enum + + LOW: :low + MEDIUM: :medium + HIGH: :high + AUTO: :auto + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::eagerness] + end + + type type_ = :server_vad | :semantic_vad + + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD: :server_vad + SEMANTIC_VAD: :semantic_vad + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::type_] + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_client_secret_config.rbs b/sig/openai/models/realtime/realtime_client_secret_config.rbs deleted file mode 100644 index e948642f..00000000 --- a/sig/openai/models/realtime/realtime_client_secret_config.rbs +++ /dev/null @@ -1,60 +0,0 @@ -module OpenAI - module Models - module Realtime - type realtime_client_secret_config = - { - expires_after: OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter - } - - class RealtimeClientSecretConfig < OpenAI::Internal::Type::BaseModel - attr_reader expires_after: OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter? - - def expires_after=: ( - OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter - ) -> OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter - - def initialize: ( - ?expires_after: OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter - ) -> void - - def to_hash: -> { - expires_after: OpenAI::Realtime::RealtimeClientSecretConfig::ExpiresAfter - } - - type expires_after = - { - anchor: OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter::anchor, - seconds: Integer - } - - class ExpiresAfter < OpenAI::Internal::Type::BaseModel - attr_accessor anchor: OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter::anchor - - attr_reader seconds: Integer? - - def seconds=: (Integer) -> Integer - - def initialize: ( - anchor: OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter::anchor, - ?seconds: Integer - ) -> void - - def to_hash: -> { - anchor: OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter::anchor, - seconds: Integer - } - - type anchor = :created_at - - module Anchor - extend OpenAI::Internal::Type::Enum - - CREATED_AT: :created_at - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeClientSecretConfig::ExpiresAfter::anchor] - end - end - end - end - end -end diff --git a/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs b/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs index 30b6fc7a..bc1e0e2a 100644 --- a/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +++ b/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs @@ -54,15 +54,25 @@ module OpenAI type content = { + audio: String, text: String, + transcript: String, type: OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::type_ } class Content < OpenAI::Internal::Type::BaseModel + attr_reader audio: String? + + def audio=: (String) -> String + attr_reader text: String? def text=: (String) -> String + attr_reader transcript: String? + + def transcript=: (String) -> String + attr_reader type: OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::type_? def type=: ( @@ -70,21 +80,26 @@ module OpenAI ) -> OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::type_ def initialize: ( + ?audio: String, ?text: String, + ?transcript: String, ?type: OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::type_ ) -> void def to_hash: -> { + audio: String, text: String, + transcript: String, type: OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::type_ } - type type_ = :text + type type_ = :output_text | :output_audio module Type extend OpenAI::Internal::Type::Enum - TEXT: :text + OUTPUT_TEXT: :output_text + OUTPUT_AUDIO: :output_audio def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::type_] end diff --git a/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs b/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs index 0fcd7da5..0c257956 100644 --- a/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +++ b/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs @@ -55,6 +55,8 @@ module OpenAI type content = { audio: String, + detail: OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::detail, + image_url: String, text: String, transcript: String, type: OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::type_ @@ -65,6 +67,16 @@ module OpenAI def audio=: (String) -> String + attr_reader detail: OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::detail? + + def detail=: ( + OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::detail + ) -> OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::detail + + attr_reader image_url: String? + + def image_url=: (String) -> String + attr_reader text: String? def text=: (String) -> String @@ -81,6 +93,8 @@ module OpenAI def initialize: ( ?audio: String, + ?detail: OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::detail, + ?image_url: String, ?text: String, ?transcript: String, ?type: OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::type_ @@ -88,18 +102,33 @@ module OpenAI def to_hash: -> { audio: String, + detail: OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::detail, + image_url: String, text: String, transcript: String, type: OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::type_ } - type type_ = :input_text | :input_audio + type detail = :auto | :low | :high + + module Detail + extend OpenAI::Internal::Type::Enum + + AUTO: :auto + LOW: :low + HIGH: :high + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::detail] + end + + type type_ = :input_text | :input_audio | :input_image module Type extend OpenAI::Internal::Type::Enum INPUT_TEXT: :input_text INPUT_AUDIO: :input_audio + INPUT_IMAGE: :input_image def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeConversationItemUserMessage::Content::type_] end diff --git a/sig/openai/models/realtime/realtime_response.rbs b/sig/openai/models/realtime/realtime_response.rbs index 8ae1abcf..bc95c27b 100644 --- a/sig/openai/models/realtime/realtime_response.rbs +++ b/sig/openai/models/realtime/realtime_response.rbs @@ -4,18 +4,16 @@ module OpenAI type realtime_response = { id: String, + audio: OpenAI::Realtime::RealtimeResponse::Audio, conversation_id: String, max_output_tokens: OpenAI::Models::Realtime::RealtimeResponse::max_output_tokens, metadata: OpenAI::Models::metadata?, - modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::modality], object: OpenAI::Models::Realtime::RealtimeResponse::object, output: ::Array[OpenAI::Models::Realtime::conversation_item], - output_audio_format: OpenAI::Models::Realtime::RealtimeResponse::output_audio_format, + output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_modality], status: OpenAI::Models::Realtime::RealtimeResponse::status, status_details: OpenAI::Realtime::RealtimeResponseStatus, - temperature: Float, - usage: OpenAI::Realtime::RealtimeResponseUsage, - voice: OpenAI::Models::Realtime::RealtimeResponse::voice + usage: OpenAI::Realtime::RealtimeResponseUsage } class RealtimeResponse < OpenAI::Internal::Type::BaseModel @@ -23,6 +21,12 @@ module OpenAI def id=: (String) -> String + attr_reader audio: OpenAI::Realtime::RealtimeResponse::Audio? + + def audio=: ( + OpenAI::Realtime::RealtimeResponse::Audio + ) -> OpenAI::Realtime::RealtimeResponse::Audio + attr_reader conversation_id: String? def conversation_id=: (String) -> String @@ -35,12 +39,6 @@ module OpenAI attr_accessor metadata: OpenAI::Models::metadata? - attr_reader modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::modality]? - - def modalities=: ( - ::Array[OpenAI::Models::Realtime::RealtimeResponse::modality] - ) -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::modality] - attr_reader object: OpenAI::Models::Realtime::RealtimeResponse::object? def object=: ( @@ -53,11 +51,11 @@ module OpenAI ::Array[OpenAI::Models::Realtime::conversation_item] ) -> ::Array[OpenAI::Models::Realtime::conversation_item] - attr_reader output_audio_format: OpenAI::Models::Realtime::RealtimeResponse::output_audio_format? + attr_reader output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_modality]? - def output_audio_format=: ( - OpenAI::Models::Realtime::RealtimeResponse::output_audio_format - ) -> OpenAI::Models::Realtime::RealtimeResponse::output_audio_format + def output_modalities=: ( + ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_modality] + ) -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_modality] attr_reader status: OpenAI::Models::Realtime::RealtimeResponse::status? @@ -71,54 +69,119 @@ module OpenAI OpenAI::Realtime::RealtimeResponseStatus ) -> OpenAI::Realtime::RealtimeResponseStatus - attr_reader temperature: Float? - - def temperature=: (Float) -> Float - attr_reader usage: OpenAI::Realtime::RealtimeResponseUsage? def usage=: ( OpenAI::Realtime::RealtimeResponseUsage ) -> OpenAI::Realtime::RealtimeResponseUsage - attr_reader voice: OpenAI::Models::Realtime::RealtimeResponse::voice? - - def voice=: ( - OpenAI::Models::Realtime::RealtimeResponse::voice - ) -> OpenAI::Models::Realtime::RealtimeResponse::voice - def initialize: ( ?id: String, + ?audio: OpenAI::Realtime::RealtimeResponse::Audio, ?conversation_id: String, ?max_output_tokens: OpenAI::Models::Realtime::RealtimeResponse::max_output_tokens, ?metadata: OpenAI::Models::metadata?, - ?modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::modality], ?object: OpenAI::Models::Realtime::RealtimeResponse::object, ?output: ::Array[OpenAI::Models::Realtime::conversation_item], - ?output_audio_format: OpenAI::Models::Realtime::RealtimeResponse::output_audio_format, + ?output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_modality], ?status: OpenAI::Models::Realtime::RealtimeResponse::status, ?status_details: OpenAI::Realtime::RealtimeResponseStatus, - ?temperature: Float, - ?usage: OpenAI::Realtime::RealtimeResponseUsage, - ?voice: OpenAI::Models::Realtime::RealtimeResponse::voice + ?usage: OpenAI::Realtime::RealtimeResponseUsage ) -> void def to_hash: -> { id: String, + audio: OpenAI::Realtime::RealtimeResponse::Audio, conversation_id: String, max_output_tokens: OpenAI::Models::Realtime::RealtimeResponse::max_output_tokens, metadata: OpenAI::Models::metadata?, - modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::modality], object: OpenAI::Models::Realtime::RealtimeResponse::object, output: ::Array[OpenAI::Models::Realtime::conversation_item], - output_audio_format: OpenAI::Models::Realtime::RealtimeResponse::output_audio_format, + output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_modality], status: OpenAI::Models::Realtime::RealtimeResponse::status, status_details: OpenAI::Realtime::RealtimeResponseStatus, - temperature: Float, - usage: OpenAI::Realtime::RealtimeResponseUsage, - voice: OpenAI::Models::Realtime::RealtimeResponse::voice + usage: OpenAI::Realtime::RealtimeResponseUsage } + type audio = + { output: OpenAI::Realtime::RealtimeResponse::Audio::Output } + + class Audio < OpenAI::Internal::Type::BaseModel + attr_reader output: OpenAI::Realtime::RealtimeResponse::Audio::Output? + + def output=: ( + OpenAI::Realtime::RealtimeResponse::Audio::Output + ) -> OpenAI::Realtime::RealtimeResponse::Audio::Output + + def initialize: ( + ?output: OpenAI::Realtime::RealtimeResponse::Audio::Output + ) -> void + + def to_hash: -> { + output: OpenAI::Realtime::RealtimeResponse::Audio::Output + } + + type output = + { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + voice: OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::voice + } + + class Output < OpenAI::Internal::Type::BaseModel + attr_reader format_: OpenAI::Models::Realtime::realtime_audio_formats? + + def format_=: ( + OpenAI::Models::Realtime::realtime_audio_formats + ) -> OpenAI::Models::Realtime::realtime_audio_formats + + attr_reader voice: OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::voice? + + def voice=: ( + OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::voice + ) -> OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::voice + + def initialize: ( + ?format_: OpenAI::Models::Realtime::realtime_audio_formats, + ?voice: OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::voice + ) -> void + + def to_hash: -> { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + voice: OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::voice + } + + type voice = + String + | :alloy + | :ash + | :ballad + | :coral + | :echo + | :sage + | :shimmer + | :verse + | :marin + | :cedar + + module Voice + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::Audio::Output::voice] + + ALLOY: :alloy + ASH: :ash + BALLAD: :ballad + CORAL: :coral + ECHO: :echo + SAGE: :sage + SHIMMER: :shimmer + VERSE: :verse + MARIN: :marin + CEDAR: :cedar + end + end + end + type max_output_tokens = Integer | :inf module MaxOutputTokens @@ -127,17 +190,6 @@ module OpenAI def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::max_output_tokens] end - type modality = :text | :audio - - module Modality - extend OpenAI::Internal::Type::Enum - - TEXT: :text - AUDIO: :audio - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::modality] - end - type object = :"realtime.response" module Object @@ -148,16 +200,15 @@ module OpenAI def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::object] end - type output_audio_format = :pcm16 | :g711_ulaw | :g711_alaw + type output_modality = :text | :audio - module OutputAudioFormat + module OutputModality extend OpenAI::Internal::Type::Enum - PCM16: :pcm16 - G711_ULAW: :g711_ulaw - G711_ALAW: :g711_alaw + TEXT: :text + AUDIO: :audio - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_audio_format] + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::output_modality] end type status = @@ -174,36 +225,6 @@ module OpenAI def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::status] end - - type voice = - String - | :alloy - | :ash - | :ballad - | :coral - | :echo - | :sage - | :shimmer - | :verse - | :marin - | :cedar - - module Voice - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponse::voice] - - ALLOY: :alloy - ASH: :ash - BALLAD: :ballad - CORAL: :coral - ECHO: :echo - SAGE: :sage - SHIMMER: :shimmer - VERSE: :verse - MARIN: :marin - CEDAR: :cedar - end end end end diff --git a/sig/openai/models/realtime/realtime_response_create_audio_output.rbs b/sig/openai/models/realtime/realtime_response_create_audio_output.rbs new file mode 100644 index 00000000..b390e133 --- /dev/null +++ b/sig/openai/models/realtime/realtime_response_create_audio_output.rbs @@ -0,0 +1,84 @@ +module OpenAI + module Models + module Realtime + type realtime_response_create_audio_output = + { output: OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output } + + class RealtimeResponseCreateAudioOutput < OpenAI::Internal::Type::BaseModel + attr_reader output: OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output? + + def output=: ( + OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output + ) -> OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output + + def initialize: ( + ?output: OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output + ) -> void + + def to_hash: -> { + output: OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output + } + + type output = + { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + voice: OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::voice + } + + class Output < OpenAI::Internal::Type::BaseModel + attr_reader format_: OpenAI::Models::Realtime::realtime_audio_formats? + + def format_=: ( + OpenAI::Models::Realtime::realtime_audio_formats + ) -> OpenAI::Models::Realtime::realtime_audio_formats + + attr_reader voice: OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::voice? + + def voice=: ( + OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::voice + ) -> OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::voice + + def initialize: ( + ?format_: OpenAI::Models::Realtime::realtime_audio_formats, + ?voice: OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::voice + ) -> void + + def to_hash: -> { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + voice: OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::voice + } + + type voice = + String + | :alloy + | :ash + | :ballad + | :coral + | :echo + | :sage + | :shimmer + | :verse + | :marin + | :cedar + + module Voice + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::voice] + + ALLOY: :alloy + ASH: :ash + BALLAD: :ballad + CORAL: :coral + ECHO: :echo + SAGE: :sage + SHIMMER: :shimmer + VERSE: :verse + MARIN: :marin + CEDAR: :cedar + end + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs b/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs new file mode 100644 index 00000000..fc1e334e --- /dev/null +++ b/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs @@ -0,0 +1,218 @@ +module OpenAI + module Models + module Realtime + type realtime_response_create_mcp_tool = + { + server_label: String, + type: :mcp, + allowed_tools: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::allowed_tools?, + authorization: String, + connector_id: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::connector_id, + headers: ::Hash[Symbol, String]?, + require_approval: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::require_approval?, + server_description: String, + server_url: String + } + + class RealtimeResponseCreateMcpTool < OpenAI::Internal::Type::BaseModel + attr_accessor server_label: String + + attr_accessor type: :mcp + + attr_accessor allowed_tools: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::allowed_tools? + + attr_reader authorization: String? + + def authorization=: (String) -> String + + attr_reader connector_id: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::connector_id? + + def connector_id=: ( + OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::connector_id + ) -> OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::connector_id + + attr_accessor headers: ::Hash[Symbol, String]? + + attr_accessor require_approval: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::require_approval? + + attr_reader server_description: String? + + def server_description=: (String) -> String + + attr_reader server_url: String? + + def server_url=: (String) -> String + + def initialize: ( + server_label: String, + ?allowed_tools: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::allowed_tools?, + ?authorization: String, + ?connector_id: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::connector_id, + ?headers: ::Hash[Symbol, String]?, + ?require_approval: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::require_approval?, + ?server_description: String, + ?server_url: String, + ?type: :mcp + ) -> void + + def to_hash: -> { + server_label: String, + type: :mcp, + allowed_tools: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::allowed_tools?, + authorization: String, + connector_id: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::connector_id, + headers: ::Hash[Symbol, String]?, + require_approval: OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::require_approval?, + server_description: String, + server_url: String + } + + type allowed_tools = + ::Array[String] + | OpenAI::Realtime::RealtimeResponseCreateMcpTool::AllowedTools::McpToolFilter + + module AllowedTools + extend OpenAI::Internal::Type::Union + + type mcp_tool_filter = + { read_only: bool, tool_names: ::Array[String] } + + class McpToolFilter < OpenAI::Internal::Type::BaseModel + attr_reader read_only: bool? + + def read_only=: (bool) -> bool + + attr_reader tool_names: ::Array[String]? + + def tool_names=: (::Array[String]) -> ::Array[String] + + def initialize: ( + ?read_only: bool, + ?tool_names: ::Array[String] + ) -> void + + def to_hash: -> { read_only: bool, tool_names: ::Array[String] } + end + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::allowed_tools] + + StringArray: OpenAI::Internal::Type::Converter + end + + type connector_id = + :connector_dropbox + | :connector_gmail + | :connector_googlecalendar + | :connector_googledrive + | :connector_microsoftteams + | :connector_outlookcalendar + | :connector_outlookemail + | :connector_sharepoint + + module ConnectorID + extend OpenAI::Internal::Type::Enum + + CONNECTOR_DROPBOX: :connector_dropbox + CONNECTOR_GMAIL: :connector_gmail + CONNECTOR_GOOGLECALENDAR: :connector_googlecalendar + CONNECTOR_GOOGLEDRIVE: :connector_googledrive + CONNECTOR_MICROSOFTTEAMS: :connector_microsoftteams + CONNECTOR_OUTLOOKCALENDAR: :connector_outlookcalendar + CONNECTOR_OUTLOOKEMAIL: :connector_outlookemail + CONNECTOR_SHAREPOINT: :connector_sharepoint + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::connector_id] + end + + type require_approval = + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter + | OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::mcp_tool_approval_setting + + module RequireApproval + extend OpenAI::Internal::Type::Union + + type mcp_tool_approval_filter = + { + always: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always, + never: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never + } + + class McpToolApprovalFilter < OpenAI::Internal::Type::BaseModel + attr_reader always: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always? + + def always=: ( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always + ) -> OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always + + attr_reader never: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never? + + def never=: ( + OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never + ) -> OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never + + def initialize: ( + ?always: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always, + ?never: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never + ) -> void + + def to_hash: -> { + always: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Always, + never: OpenAI::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::McpToolApprovalFilter::Never + } + + type always = { read_only: bool, tool_names: ::Array[String] } + + class Always < OpenAI::Internal::Type::BaseModel + attr_reader read_only: bool? + + def read_only=: (bool) -> bool + + attr_reader tool_names: ::Array[String]? + + def tool_names=: (::Array[String]) -> ::Array[String] + + def initialize: ( + ?read_only: bool, + ?tool_names: ::Array[String] + ) -> void + + def to_hash: -> { read_only: bool, tool_names: ::Array[String] } + end + + type never = { read_only: bool, tool_names: ::Array[String] } + + class Never < OpenAI::Internal::Type::BaseModel + attr_reader read_only: bool? + + def read_only=: (bool) -> bool + + attr_reader tool_names: ::Array[String]? + + def tool_names=: (::Array[String]) -> ::Array[String] + + def initialize: ( + ?read_only: bool, + ?tool_names: ::Array[String] + ) -> void + + def to_hash: -> { read_only: bool, tool_names: ::Array[String] } + end + end + + type mcp_tool_approval_setting = :always | :never + + module McpToolApprovalSetting + extend OpenAI::Internal::Type::Enum + + ALWAYS: :always + NEVER: :never + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::RequireApproval::mcp_tool_approval_setting] + end + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateMcpTool::require_approval] + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_response_create_params.rbs b/sig/openai/models/realtime/realtime_response_create_params.rbs new file mode 100644 index 00000000..791fadbe --- /dev/null +++ b/sig/openai/models/realtime/realtime_response_create_params.rbs @@ -0,0 +1,148 @@ +module OpenAI + module Models + module Realtime + type realtime_response_create_params = + { + audio: OpenAI::Realtime::RealtimeResponseCreateAudioOutput, + conversation: OpenAI::Models::Realtime::RealtimeResponseCreateParams::conversation, + input: ::Array[OpenAI::Models::Realtime::conversation_item], + instructions: String, + max_output_tokens: OpenAI::Models::Realtime::RealtimeResponseCreateParams::max_output_tokens, + metadata: OpenAI::Models::metadata?, + output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::output_modality], + prompt: OpenAI::Responses::ResponsePrompt?, + tool_choice: OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool_choice, + tools: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool] + } + + class RealtimeResponseCreateParams < OpenAI::Internal::Type::BaseModel + attr_reader audio: OpenAI::Realtime::RealtimeResponseCreateAudioOutput? + + def audio=: ( + OpenAI::Realtime::RealtimeResponseCreateAudioOutput + ) -> OpenAI::Realtime::RealtimeResponseCreateAudioOutput + + attr_reader conversation: OpenAI::Models::Realtime::RealtimeResponseCreateParams::conversation? + + def conversation=: ( + OpenAI::Models::Realtime::RealtimeResponseCreateParams::conversation + ) -> OpenAI::Models::Realtime::RealtimeResponseCreateParams::conversation + + attr_reader input: ::Array[OpenAI::Models::Realtime::conversation_item]? + + def input=: ( + ::Array[OpenAI::Models::Realtime::conversation_item] + ) -> ::Array[OpenAI::Models::Realtime::conversation_item] + + attr_reader instructions: String? + + def instructions=: (String) -> String + + attr_reader max_output_tokens: OpenAI::Models::Realtime::RealtimeResponseCreateParams::max_output_tokens? + + def max_output_tokens=: ( + OpenAI::Models::Realtime::RealtimeResponseCreateParams::max_output_tokens + ) -> OpenAI::Models::Realtime::RealtimeResponseCreateParams::max_output_tokens + + attr_accessor metadata: OpenAI::Models::metadata? + + attr_reader output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::output_modality]? + + def output_modalities=: ( + ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::output_modality] + ) -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::output_modality] + + attr_accessor prompt: OpenAI::Responses::ResponsePrompt? + + attr_reader tool_choice: OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool_choice? + + def tool_choice=: ( + OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool_choice + ) -> OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool_choice + + attr_reader tools: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool]? + + def tools=: ( + ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool] + ) -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool] + + def initialize: ( + ?audio: OpenAI::Realtime::RealtimeResponseCreateAudioOutput, + ?conversation: OpenAI::Models::Realtime::RealtimeResponseCreateParams::conversation, + ?input: ::Array[OpenAI::Models::Realtime::conversation_item], + ?instructions: String, + ?max_output_tokens: OpenAI::Models::Realtime::RealtimeResponseCreateParams::max_output_tokens, + ?metadata: OpenAI::Models::metadata?, + ?output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::output_modality], + ?prompt: OpenAI::Responses::ResponsePrompt?, + ?tool_choice: OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool_choice, + ?tools: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool] + ) -> void + + def to_hash: -> { + audio: OpenAI::Realtime::RealtimeResponseCreateAudioOutput, + conversation: OpenAI::Models::Realtime::RealtimeResponseCreateParams::conversation, + input: ::Array[OpenAI::Models::Realtime::conversation_item], + instructions: String, + max_output_tokens: OpenAI::Models::Realtime::RealtimeResponseCreateParams::max_output_tokens, + metadata: OpenAI::Models::metadata?, + output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::output_modality], + prompt: OpenAI::Responses::ResponsePrompt?, + tool_choice: OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool_choice, + tools: ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool] + } + + type conversation = String | :auto | :none + + module Conversation + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::conversation] + + AUTO: :auto + NONE: :none + end + + type max_output_tokens = Integer | :inf + + module MaxOutputTokens + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::max_output_tokens] + end + + type output_modality = :text | :audio + + module OutputModality + extend OpenAI::Internal::Type::Enum + + TEXT: :text + AUDIO: :audio + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::output_modality] + end + + type tool_choice = + OpenAI::Models::Responses::tool_choice_options + | OpenAI::Responses::ToolChoiceFunction + | OpenAI::Responses::ToolChoiceMcp + + module ToolChoice + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool_choice] + end + + type tool = + OpenAI::Realtime::Models + | OpenAI::Realtime::RealtimeResponseCreateMcpTool + + module Tool + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeResponseCreateParams::tool] + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs b/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs index c8d11329..0d02439c 100644 --- a/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +++ b/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs @@ -2,7 +2,13 @@ module OpenAI module Models module Realtime type realtime_response_usage_input_token_details = - { audio_tokens: Integer, cached_tokens: Integer, text_tokens: Integer } + { + audio_tokens: Integer, + cached_tokens: Integer, + cached_tokens_details: OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails, + image_tokens: Integer, + text_tokens: Integer + } class RealtimeResponseUsageInputTokenDetails < OpenAI::Internal::Type::BaseModel attr_reader audio_tokens: Integer? @@ -13,6 +19,16 @@ module OpenAI def cached_tokens=: (Integer) -> Integer + attr_reader cached_tokens_details: OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails? + + def cached_tokens_details=: ( + OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails + ) -> OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails + + attr_reader image_tokens: Integer? + + def image_tokens=: (Integer) -> Integer + attr_reader text_tokens: Integer? def text_tokens=: (Integer) -> Integer @@ -20,14 +36,47 @@ module OpenAI def initialize: ( ?audio_tokens: Integer, ?cached_tokens: Integer, + ?cached_tokens_details: OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails, + ?image_tokens: Integer, ?text_tokens: Integer ) -> void def to_hash: -> { audio_tokens: Integer, cached_tokens: Integer, + cached_tokens_details: OpenAI::Realtime::RealtimeResponseUsageInputTokenDetails::CachedTokensDetails, + image_tokens: Integer, text_tokens: Integer } + + type cached_tokens_details = + { audio_tokens: Integer, image_tokens: Integer, text_tokens: Integer } + + class CachedTokensDetails < OpenAI::Internal::Type::BaseModel + attr_reader audio_tokens: Integer? + + def audio_tokens=: (Integer) -> Integer + + attr_reader image_tokens: Integer? + + def image_tokens=: (Integer) -> Integer + + attr_reader text_tokens: Integer? + + def text_tokens=: (Integer) -> Integer + + def initialize: ( + ?audio_tokens: Integer, + ?image_tokens: Integer, + ?text_tokens: Integer + ) -> void + + def to_hash: -> { + audio_tokens: Integer, + image_tokens: Integer, + text_tokens: Integer + } + end end end end diff --git a/sig/openai/models/realtime/realtime_session.rbs b/sig/openai/models/realtime/realtime_session.rbs index c6230057..a7955ca2 100644 --- a/sig/openai/models/realtime/realtime_session.rbs +++ b/sig/openai/models/realtime/realtime_session.rbs @@ -8,7 +8,7 @@ module OpenAI include: ::Array[OpenAI::Models::Realtime::RealtimeSession::include_]?, input_audio_format: OpenAI::Models::Realtime::RealtimeSession::input_audio_format, input_audio_noise_reduction: OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction, - input_audio_transcription: OpenAI::Realtime::RealtimeSession::InputAudioTranscription?, + input_audio_transcription: OpenAI::Realtime::AudioTranscription?, instructions: String, max_response_output_tokens: OpenAI::Models::Realtime::RealtimeSession::max_response_output_tokens, modalities: ::Array[OpenAI::Models::Realtime::RealtimeSession::modality], @@ -19,7 +19,7 @@ module OpenAI speed: Float, temperature: Float, tool_choice: String, - tools: ::Array[OpenAI::Realtime::RealtimeSession::Tool], + tools: ::Array[OpenAI::Realtime::Models], tracing: OpenAI::Models::Realtime::RealtimeSession::tracing?, turn_detection: OpenAI::Realtime::RealtimeSession::TurnDetection?, voice: OpenAI::Models::Realtime::RealtimeSession::voice @@ -48,7 +48,7 @@ module OpenAI OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction ) -> OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction - attr_accessor input_audio_transcription: OpenAI::Realtime::RealtimeSession::InputAudioTranscription? + attr_accessor input_audio_transcription: OpenAI::Realtime::AudioTranscription? attr_reader instructions: String? @@ -98,11 +98,11 @@ module OpenAI def tool_choice=: (String) -> String - attr_reader tools: ::Array[OpenAI::Realtime::RealtimeSession::Tool]? + attr_reader tools: ::Array[OpenAI::Realtime::Models]? def tools=: ( - ::Array[OpenAI::Realtime::RealtimeSession::Tool] - ) -> ::Array[OpenAI::Realtime::RealtimeSession::Tool] + ::Array[OpenAI::Realtime::Models] + ) -> ::Array[OpenAI::Realtime::Models] attr_accessor tracing: OpenAI::Models::Realtime::RealtimeSession::tracing? @@ -120,7 +120,7 @@ module OpenAI ?include: ::Array[OpenAI::Models::Realtime::RealtimeSession::include_]?, ?input_audio_format: OpenAI::Models::Realtime::RealtimeSession::input_audio_format, ?input_audio_noise_reduction: OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction, - ?input_audio_transcription: OpenAI::Realtime::RealtimeSession::InputAudioTranscription?, + ?input_audio_transcription: OpenAI::Realtime::AudioTranscription?, ?instructions: String, ?max_response_output_tokens: OpenAI::Models::Realtime::RealtimeSession::max_response_output_tokens, ?modalities: ::Array[OpenAI::Models::Realtime::RealtimeSession::modality], @@ -131,7 +131,7 @@ module OpenAI ?speed: Float, ?temperature: Float, ?tool_choice: String, - ?tools: ::Array[OpenAI::Realtime::RealtimeSession::Tool], + ?tools: ::Array[OpenAI::Realtime::Models], ?tracing: OpenAI::Models::Realtime::RealtimeSession::tracing?, ?turn_detection: OpenAI::Realtime::RealtimeSession::TurnDetection?, ?voice: OpenAI::Models::Realtime::RealtimeSession::voice @@ -143,7 +143,7 @@ module OpenAI include: ::Array[OpenAI::Models::Realtime::RealtimeSession::include_]?, input_audio_format: OpenAI::Models::Realtime::RealtimeSession::input_audio_format, input_audio_noise_reduction: OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction, - input_audio_transcription: OpenAI::Realtime::RealtimeSession::InputAudioTranscription?, + input_audio_transcription: OpenAI::Realtime::AudioTranscription?, instructions: String, max_response_output_tokens: OpenAI::Models::Realtime::RealtimeSession::max_response_output_tokens, modalities: ::Array[OpenAI::Models::Realtime::RealtimeSession::modality], @@ -154,7 +154,7 @@ module OpenAI speed: Float, temperature: Float, tool_choice: String, - tools: ::Array[OpenAI::Realtime::RealtimeSession::Tool], + tools: ::Array[OpenAI::Realtime::Models], tracing: OpenAI::Models::Realtime::RealtimeSession::tracing?, turn_detection: OpenAI::Realtime::RealtimeSession::TurnDetection?, voice: OpenAI::Models::Realtime::RealtimeSession::voice @@ -183,60 +183,22 @@ module OpenAI end type input_audio_noise_reduction = - { - type: OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::type_ - } + { type: OpenAI::Models::Realtime::noise_reduction_type } class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel - attr_reader type: OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::type_? + attr_reader type: OpenAI::Models::Realtime::noise_reduction_type? def type=: ( - OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::type_ - ) -> OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::type_ + OpenAI::Models::Realtime::noise_reduction_type + ) -> OpenAI::Models::Realtime::noise_reduction_type def initialize: ( - ?type: OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::type_ + ?type: OpenAI::Models::Realtime::noise_reduction_type ) -> void def to_hash: -> { - type: OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::type_ + type: OpenAI::Models::Realtime::noise_reduction_type } - - type type_ = :near_field | :far_field - - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD: :near_field - FAR_FIELD: :far_field - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::type_] - end - end - - type input_audio_transcription = - { language: String, model: String, prompt: String } - - class InputAudioTranscription < OpenAI::Internal::Type::BaseModel - attr_reader language: String? - - def language=: (String) -> String - - attr_reader model: String? - - def model=: (String) -> String - - attr_reader prompt: String? - - def prompt=: (String) -> String - - def initialize: ( - ?language: String, - ?model: String, - ?prompt: String - ) -> void - - def to_hash: -> { language: String, model: String, prompt: String } end type max_response_output_tokens = Integer | :inf @@ -305,58 +267,6 @@ module OpenAI def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSession::output_audio_format] end - type tool = - { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::RealtimeSession::Tool::type_ - } - - class Tool < OpenAI::Internal::Type::BaseModel - attr_reader description: String? - - def description=: (String) -> String - - attr_reader name: String? - - def name=: (String) -> String - - attr_reader parameters: top? - - def parameters=: (top) -> top - - attr_reader type: OpenAI::Models::Realtime::RealtimeSession::Tool::type_? - - def type=: ( - OpenAI::Models::Realtime::RealtimeSession::Tool::type_ - ) -> OpenAI::Models::Realtime::RealtimeSession::Tool::type_ - - def initialize: ( - ?description: String, - ?name: String, - ?parameters: top, - ?type: OpenAI::Models::Realtime::RealtimeSession::Tool::type_ - ) -> void - - def to_hash: -> { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::RealtimeSession::Tool::type_ - } - - type type_ = :function - - module Type - extend OpenAI::Internal::Type::Enum - - FUNCTION: :function - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSession::Tool::type_] - end - end - type tracing = :auto | OpenAI::Realtime::RealtimeSession::Tracing::TracingConfiguration diff --git a/sig/openai/models/realtime/realtime_session_client_secret.rbs b/sig/openai/models/realtime/realtime_session_client_secret.rbs new file mode 100644 index 00000000..fd104e7e --- /dev/null +++ b/sig/openai/models/realtime/realtime_session_client_secret.rbs @@ -0,0 +1,20 @@ +module OpenAI + module Models + class RealtimeSessionClientSecret = Realtime::RealtimeSessionClientSecret + + module Realtime + type realtime_session_client_secret = + { expires_at: Integer, value: String } + + class RealtimeSessionClientSecret < OpenAI::Internal::Type::BaseModel + attr_accessor expires_at: Integer + + attr_accessor value: String + + def initialize: (expires_at: Integer, value: String) -> void + + def to_hash: -> { expires_at: Integer, value: String } + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_session_create_request.rbs b/sig/openai/models/realtime/realtime_session_create_request.rbs index af194e00..d7db4531 100644 --- a/sig/openai/models/realtime/realtime_session_create_request.rbs +++ b/sig/openai/models/realtime/realtime_session_create_request.rbs @@ -3,16 +3,14 @@ module OpenAI module Realtime type realtime_session_create_request = { - model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model, type: :realtime, audio: OpenAI::Realtime::RealtimeAudioConfig, - client_secret: OpenAI::Realtime::RealtimeClientSecretConfig, include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::include_], instructions: String, max_output_tokens: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::max_output_tokens, + model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model, output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::output_modality], prompt: OpenAI::Responses::ResponsePrompt?, - temperature: Float, tool_choice: OpenAI::Models::Realtime::realtime_tool_choice_config, tools: OpenAI::Models::Realtime::realtime_tools_config, tracing: OpenAI::Models::Realtime::realtime_tracing_config?, @@ -20,8 +18,6 @@ module OpenAI } class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel - attr_accessor model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model - attr_accessor type: :realtime attr_reader audio: OpenAI::Realtime::RealtimeAudioConfig? @@ -30,12 +26,6 @@ module OpenAI OpenAI::Realtime::RealtimeAudioConfig ) -> OpenAI::Realtime::RealtimeAudioConfig - attr_reader client_secret: OpenAI::Realtime::RealtimeClientSecretConfig? - - def client_secret=: ( - OpenAI::Realtime::RealtimeClientSecretConfig - ) -> OpenAI::Realtime::RealtimeClientSecretConfig - attr_reader include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::include_]? def include=: ( @@ -52,6 +42,12 @@ module OpenAI OpenAI::Models::Realtime::RealtimeSessionCreateRequest::max_output_tokens ) -> OpenAI::Models::Realtime::RealtimeSessionCreateRequest::max_output_tokens + attr_reader model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model? + + def model=: ( + OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model + ) -> OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model + attr_reader output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::output_modality]? def output_modalities=: ( @@ -60,10 +56,6 @@ module OpenAI attr_accessor prompt: OpenAI::Responses::ResponsePrompt? - attr_reader temperature: Float? - - def temperature=: (Float) -> Float - attr_reader tool_choice: OpenAI::Models::Realtime::realtime_tool_choice_config? def tool_choice=: ( @@ -85,15 +77,13 @@ module OpenAI ) -> OpenAI::Models::Realtime::realtime_truncation def initialize: ( - model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model, ?audio: OpenAI::Realtime::RealtimeAudioConfig, - ?client_secret: OpenAI::Realtime::RealtimeClientSecretConfig, ?include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::include_], ?instructions: String, ?max_output_tokens: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::max_output_tokens, + ?model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model, ?output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::output_modality], ?prompt: OpenAI::Responses::ResponsePrompt?, - ?temperature: Float, ?tool_choice: OpenAI::Models::Realtime::realtime_tool_choice_config, ?tools: OpenAI::Models::Realtime::realtime_tools_config, ?tracing: OpenAI::Models::Realtime::realtime_tracing_config?, @@ -102,28 +92,42 @@ module OpenAI ) -> void def to_hash: -> { - model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model, type: :realtime, audio: OpenAI::Realtime::RealtimeAudioConfig, - client_secret: OpenAI::Realtime::RealtimeClientSecretConfig, include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::include_], instructions: String, max_output_tokens: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::max_output_tokens, + model: OpenAI::Models::Realtime::RealtimeSessionCreateRequest::model, output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::output_modality], prompt: OpenAI::Responses::ResponsePrompt?, - temperature: Float, tool_choice: OpenAI::Models::Realtime::realtime_tool_choice_config, tools: OpenAI::Models::Realtime::realtime_tools_config, tracing: OpenAI::Models::Realtime::realtime_tracing_config?, truncation: OpenAI::Models::Realtime::realtime_truncation } + type include_ = :"item.input_audio_transcription.logprobs" + + module Include + extend OpenAI::Internal::Type::Enum + + ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS: :"item.input_audio_transcription.logprobs" + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::include_] + end + + type max_output_tokens = Integer | :inf + + module MaxOutputTokens + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::max_output_tokens] + end + type model = String | :"gpt-realtime" | :"gpt-realtime-2025-08-28" - | :"gpt-4o-realtime" - | :"gpt-4o-mini-realtime" | :"gpt-4o-realtime-preview" | :"gpt-4o-realtime-preview-2024-10-01" | :"gpt-4o-realtime-preview-2024-12-17" @@ -138,8 +142,6 @@ module OpenAI GPT_REALTIME: :"gpt-realtime" GPT_REALTIME_2025_08_28: :"gpt-realtime-2025-08-28" - GPT_4O_REALTIME: :"gpt-4o-realtime" - GPT_4O_MINI_REALTIME: :"gpt-4o-mini-realtime" GPT_4O_REALTIME_PREVIEW: :"gpt-4o-realtime-preview" GPT_4O_REALTIME_PREVIEW_2024_10_01: :"gpt-4o-realtime-preview-2024-10-01" GPT_4O_REALTIME_PREVIEW_2024_12_17: :"gpt-4o-realtime-preview-2024-12-17" @@ -148,24 +150,6 @@ module OpenAI GPT_4O_MINI_REALTIME_PREVIEW_2024_12_17: :"gpt-4o-mini-realtime-preview-2024-12-17" end - type include_ = :"item.input_audio_transcription.logprobs" - - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS: :"item.input_audio_transcription.logprobs" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::include_] - end - - type max_output_tokens = Integer | :inf - - module MaxOutputTokens - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateRequest::max_output_tokens] - end - type output_modality = :text | :audio module OutputModality diff --git a/sig/openai/models/realtime/realtime_session_create_response.rbs b/sig/openai/models/realtime/realtime_session_create_response.rbs index de7cdba0..86d7963a 100644 --- a/sig/openai/models/realtime/realtime_session_create_response.rbs +++ b/sig/openai/models/realtime/realtime_session_create_response.rbs @@ -5,35 +5,33 @@ module OpenAI module Realtime type realtime_session_create_response = { - id: String, audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio, - expires_at: Integer, + client_secret: OpenAI::Realtime::RealtimeSessionClientSecret, include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::include_], instructions: String, max_output_tokens: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::max_output_tokens, - model: String, - object: String, + model: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::model, output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::output_modality], - tool_choice: String, - tools: ::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool], - tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing, - turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection + prompt: OpenAI::Responses::ResponsePrompt?, + tool_choice: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool_choice, + tools: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool], + tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing?, + truncation: OpenAI::Models::Realtime::realtime_truncation, + type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::type_ } class RealtimeSessionCreateResponse < OpenAI::Internal::Type::BaseModel - attr_reader id: String? - - def id=: (String) -> String - attr_reader audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio? def audio=: ( OpenAI::Realtime::RealtimeSessionCreateResponse::Audio ) -> OpenAI::Realtime::RealtimeSessionCreateResponse::Audio - attr_reader expires_at: Integer? + attr_reader client_secret: OpenAI::Realtime::RealtimeSessionClientSecret? - def expires_at=: (Integer) -> Integer + def client_secret=: ( + OpenAI::Realtime::RealtimeSessionClientSecret + ) -> OpenAI::Realtime::RealtimeSessionClientSecret attr_reader include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::include_]? @@ -51,13 +49,11 @@ module OpenAI OpenAI::Models::Realtime::RealtimeSessionCreateResponse::max_output_tokens ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::max_output_tokens - attr_reader model: String? - - def model=: (String) -> String + attr_reader model: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::model? - attr_reader object: String? - - def object=: (String) -> String + def model=: ( + OpenAI::Models::Realtime::RealtimeSessionCreateResponse::model + ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::model attr_reader output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::output_modality]? @@ -65,58 +61,64 @@ module OpenAI ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::output_modality] ) -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::output_modality] - attr_reader tool_choice: String? + attr_accessor prompt: OpenAI::Responses::ResponsePrompt? + + attr_reader tool_choice: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool_choice? - def tool_choice=: (String) -> String + def tool_choice=: ( + OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool_choice + ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool_choice - attr_reader tools: ::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool]? + attr_reader tools: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool]? def tools=: ( - ::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool] - ) -> ::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool] + ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool] + ) -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool] - attr_reader tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing? + attr_accessor tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing? - def tracing=: ( - OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing - ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing + attr_reader truncation: OpenAI::Models::Realtime::realtime_truncation? - attr_reader turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection? + def truncation=: ( + OpenAI::Models::Realtime::realtime_truncation + ) -> OpenAI::Models::Realtime::realtime_truncation - def turn_detection=: ( - OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection - ) -> OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection + attr_reader type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::type_? + + def type=: ( + OpenAI::Models::Realtime::RealtimeSessionCreateResponse::type_ + ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::type_ def initialize: ( - ?id: String, ?audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio, - ?expires_at: Integer, + ?client_secret: OpenAI::Realtime::RealtimeSessionClientSecret, ?include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::include_], ?instructions: String, ?max_output_tokens: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::max_output_tokens, - ?model: String, - ?object: String, + ?model: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::model, ?output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::output_modality], - ?tool_choice: String, - ?tools: ::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool], - ?tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing, - ?turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection + ?prompt: OpenAI::Responses::ResponsePrompt?, + ?tool_choice: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool_choice, + ?tools: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool], + ?tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing?, + ?truncation: OpenAI::Models::Realtime::realtime_truncation, + ?type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::type_ ) -> void def to_hash: -> { - id: String, audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio, - expires_at: Integer, + client_secret: OpenAI::Realtime::RealtimeSessionClientSecret, include: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::include_], instructions: String, max_output_tokens: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::max_output_tokens, - model: String, - object: String, + model: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::model, output_modalities: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::output_modality], - tool_choice: String, - tools: ::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool], - tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing, - turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection + prompt: OpenAI::Responses::ResponsePrompt?, + tool_choice: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool_choice, + tools: ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool], + tracing: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing?, + truncation: OpenAI::Models::Realtime::realtime_truncation, + type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::type_ } type audio = @@ -150,16 +152,18 @@ module OpenAI type input = { - format_: String, + format_: OpenAI::Models::Realtime::realtime_audio_formats, noise_reduction: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction, - transcription: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription, + transcription: OpenAI::Realtime::AudioTranscription, turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection } class Input < OpenAI::Internal::Type::BaseModel - attr_reader format_: String? + attr_reader format_: OpenAI::Models::Realtime::realtime_audio_formats? - def format_=: (String) -> String + def format_=: ( + OpenAI::Models::Realtime::realtime_audio_formats + ) -> OpenAI::Models::Realtime::realtime_audio_formats attr_reader noise_reduction: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction? @@ -167,11 +171,11 @@ module OpenAI OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction ) -> OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction - attr_reader transcription: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription? + attr_reader transcription: OpenAI::Realtime::AudioTranscription? def transcription=: ( - OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription - ) -> OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription + OpenAI::Realtime::AudioTranscription + ) -> OpenAI::Realtime::AudioTranscription attr_reader turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection? @@ -180,89 +184,67 @@ module OpenAI ) -> OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection def initialize: ( - ?format_: String, + ?format_: OpenAI::Models::Realtime::realtime_audio_formats, ?noise_reduction: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction, - ?transcription: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription, + ?transcription: OpenAI::Realtime::AudioTranscription, ?turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection ) -> void def to_hash: -> { - format_: String, + format_: OpenAI::Models::Realtime::realtime_audio_formats, noise_reduction: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction, - transcription: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription, + transcription: OpenAI::Realtime::AudioTranscription, turn_detection: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection } type noise_reduction = - { - type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::type_ - } + { type: OpenAI::Models::Realtime::noise_reduction_type } class NoiseReduction < OpenAI::Internal::Type::BaseModel - attr_reader type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::type_? + attr_reader type: OpenAI::Models::Realtime::noise_reduction_type? def type=: ( - OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::type_ - ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::type_ + OpenAI::Models::Realtime::noise_reduction_type + ) -> OpenAI::Models::Realtime::noise_reduction_type def initialize: ( - ?type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::type_ + ?type: OpenAI::Models::Realtime::noise_reduction_type ) -> void def to_hash: -> { - type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::type_ - } - - type type_ = :near_field | :far_field - - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD: :near_field - FAR_FIELD: :far_field - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::type_] - end - end - - type transcription = - { language: String, model: String, prompt: String } - - class Transcription < OpenAI::Internal::Type::BaseModel - attr_reader language: String? - - def language=: (String) -> String - - attr_reader model: String? - - def model=: (String) -> String - - attr_reader prompt: String? - - def prompt=: (String) -> String - - def initialize: ( - ?language: String, - ?model: String, - ?prompt: String - ) -> void - - def to_hash: -> { - language: String, - model: String, - prompt: String + type: OpenAI::Models::Realtime::noise_reduction_type } end type turn_detection = { + create_response: bool, + eagerness: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::eagerness, + idle_timeout_ms: Integer?, + interrupt_response: bool, prefix_padding_ms: Integer, silence_duration_ms: Integer, threshold: Float, - type: String + type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::type_ } class TurnDetection < OpenAI::Internal::Type::BaseModel + attr_reader create_response: bool? + + def create_response=: (bool) -> bool + + attr_reader eagerness: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::eagerness? + + def eagerness=: ( + OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::eagerness + ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::eagerness + + attr_accessor idle_timeout_ms: Integer? + + attr_reader interrupt_response: bool? + + def interrupt_response=: (bool) -> bool + attr_reader prefix_padding_ms: Integer? def prefix_padding_ms=: (Integer) -> Integer @@ -275,37 +257,73 @@ module OpenAI def threshold=: (Float) -> Float - attr_reader type: String? + attr_reader type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::type_? - def type=: (String) -> String + def type=: ( + OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::type_ + ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::type_ def initialize: ( + ?create_response: bool, + ?eagerness: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::eagerness, + ?idle_timeout_ms: Integer?, + ?interrupt_response: bool, ?prefix_padding_ms: Integer, ?silence_duration_ms: Integer, ?threshold: Float, - ?type: String + ?type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::type_ ) -> void def to_hash: -> { + create_response: bool, + eagerness: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::eagerness, + idle_timeout_ms: Integer?, + interrupt_response: bool, prefix_padding_ms: Integer, silence_duration_ms: Integer, threshold: Float, - type: String + type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::type_ } + + type eagerness = :low | :medium | :high | :auto + + module Eagerness + extend OpenAI::Internal::Type::Enum + + LOW: :low + MEDIUM: :medium + HIGH: :high + AUTO: :auto + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::eagerness] + end + + type type_ = :server_vad | :semantic_vad + + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD: :server_vad + SEMANTIC_VAD: :semantic_vad + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::type_] + end end end type output = { - format_: String, + format_: OpenAI::Models::Realtime::realtime_audio_formats, speed: Float, voice: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output::voice } class Output < OpenAI::Internal::Type::BaseModel - attr_reader format_: String? + attr_reader format_: OpenAI::Models::Realtime::realtime_audio_formats? - def format_=: (String) -> String + def format_=: ( + OpenAI::Models::Realtime::realtime_audio_formats + ) -> OpenAI::Models::Realtime::realtime_audio_formats attr_reader speed: Float? @@ -318,13 +336,13 @@ module OpenAI ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output::voice def initialize: ( - ?format_: String, + ?format_: OpenAI::Models::Realtime::realtime_audio_formats, ?speed: Float, ?voice: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output::voice ) -> void def to_hash: -> { - format_: String, + format_: OpenAI::Models::Realtime::realtime_audio_formats, speed: Float, voice: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Audio::Output::voice } @@ -379,6 +397,32 @@ module OpenAI def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::max_output_tokens] end + type model = + String + | :"gpt-realtime" + | :"gpt-realtime-2025-08-28" + | :"gpt-4o-realtime-preview" + | :"gpt-4o-realtime-preview-2024-10-01" + | :"gpt-4o-realtime-preview-2024-12-17" + | :"gpt-4o-realtime-preview-2025-06-03" + | :"gpt-4o-mini-realtime-preview" + | :"gpt-4o-mini-realtime-preview-2024-12-17" + + module Model + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::model] + + GPT_REALTIME: :"gpt-realtime" + GPT_REALTIME_2025_08_28: :"gpt-realtime-2025-08-28" + GPT_4O_REALTIME_PREVIEW: :"gpt-4o-realtime-preview" + GPT_4O_REALTIME_PREVIEW_2024_10_01: :"gpt-4o-realtime-preview-2024-10-01" + GPT_4O_REALTIME_PREVIEW_2024_12_17: :"gpt-4o-realtime-preview-2024-12-17" + GPT_4O_REALTIME_PREVIEW_2025_06_03: :"gpt-4o-realtime-preview-2025-06-03" + GPT_4O_MINI_REALTIME_PREVIEW: :"gpt-4o-mini-realtime-preview" + GPT_4O_MINI_REALTIME_PREVIEW_2024_12_17: :"gpt-4o-mini-realtime-preview-2024-12-17" + end + type output_modality = :text | :audio module OutputModality @@ -390,56 +434,244 @@ module OpenAI def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::output_modality] end + type tool_choice = + OpenAI::Models::Responses::tool_choice_options + | OpenAI::Responses::ToolChoiceFunction + | OpenAI::Responses::ToolChoiceMcp + + module ToolChoice + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool_choice] + end + type tool = - { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::type_ - } + OpenAI::Realtime::Models + | OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool - class Tool < OpenAI::Internal::Type::BaseModel - attr_reader description: String? + module Tool + extend OpenAI::Internal::Type::Union - def description=: (String) -> String + type mcp_tool = + { + server_label: String, + type: :mcp, + allowed_tools: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::allowed_tools?, + authorization: String, + connector_id: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::connector_id, + headers: ::Hash[Symbol, String]?, + require_approval: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::require_approval?, + server_description: String, + server_url: String + } - attr_reader name: String? + class McpTool < OpenAI::Internal::Type::BaseModel + attr_accessor server_label: String - def name=: (String) -> String + attr_accessor type: :mcp - attr_reader parameters: top? + attr_accessor allowed_tools: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::allowed_tools? - def parameters=: (top) -> top + attr_reader authorization: String? - attr_reader type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::type_? + def authorization=: (String) -> String - def type=: ( - OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::type_ - ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::type_ + attr_reader connector_id: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::connector_id? - def initialize: ( - ?description: String, - ?name: String, - ?parameters: top, - ?type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::type_ - ) -> void + def connector_id=: ( + OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::connector_id + ) -> OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::connector_id - def to_hash: -> { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::type_ - } + attr_accessor headers: ::Hash[Symbol, String]? + + attr_accessor require_approval: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::require_approval? - type type_ = :function + attr_reader server_description: String? - module Type - extend OpenAI::Internal::Type::Enum + def server_description=: (String) -> String - FUNCTION: :function + attr_reader server_url: String? - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::type_] + def server_url=: (String) -> String + + def initialize: ( + server_label: String, + ?allowed_tools: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::allowed_tools?, + ?authorization: String, + ?connector_id: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::connector_id, + ?headers: ::Hash[Symbol, String]?, + ?require_approval: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::require_approval?, + ?server_description: String, + ?server_url: String, + ?type: :mcp + ) -> void + + def to_hash: -> { + server_label: String, + type: :mcp, + allowed_tools: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::allowed_tools?, + authorization: String, + connector_id: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::connector_id, + headers: ::Hash[Symbol, String]?, + require_approval: OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::require_approval?, + server_description: String, + server_url: String + } + + type allowed_tools = + ::Array[String] + | OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter + + module AllowedTools + extend OpenAI::Internal::Type::Union + + type mcp_tool_filter = + { read_only: bool, tool_names: ::Array[String] } + + class McpToolFilter < OpenAI::Internal::Type::BaseModel + attr_reader read_only: bool? + + def read_only=: (bool) -> bool + + attr_reader tool_names: ::Array[String]? + + def tool_names=: (::Array[String]) -> ::Array[String] + + def initialize: ( + ?read_only: bool, + ?tool_names: ::Array[String] + ) -> void + + def to_hash: -> { read_only: bool, tool_names: ::Array[String] } + end + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::allowed_tools] + + StringArray: OpenAI::Internal::Type::Converter + end + + type connector_id = + :connector_dropbox + | :connector_gmail + | :connector_googlecalendar + | :connector_googledrive + | :connector_microsoftteams + | :connector_outlookcalendar + | :connector_outlookemail + | :connector_sharepoint + + module ConnectorID + extend OpenAI::Internal::Type::Enum + + CONNECTOR_DROPBOX: :connector_dropbox + CONNECTOR_GMAIL: :connector_gmail + CONNECTOR_GOOGLECALENDAR: :connector_googlecalendar + CONNECTOR_GOOGLEDRIVE: :connector_googledrive + CONNECTOR_MICROSOFTTEAMS: :connector_microsoftteams + CONNECTOR_OUTLOOKCALENDAR: :connector_outlookcalendar + CONNECTOR_OUTLOOKEMAIL: :connector_outlookemail + CONNECTOR_SHAREPOINT: :connector_sharepoint + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::connector_id] + end + + type require_approval = + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter + | OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::mcp_tool_approval_setting + + module RequireApproval + extend OpenAI::Internal::Type::Union + + type mcp_tool_approval_filter = + { + always: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always, + never: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never + } + + class McpToolApprovalFilter < OpenAI::Internal::Type::BaseModel + attr_reader always: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always? + + def always=: ( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always + ) -> OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always + + attr_reader never: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never? + + def never=: ( + OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never + ) -> OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never + + def initialize: ( + ?always: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always, + ?never: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never + ) -> void + + def to_hash: -> { + always: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always, + never: OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never + } + + type always = { read_only: bool, tool_names: ::Array[String] } + + class Always < OpenAI::Internal::Type::BaseModel + attr_reader read_only: bool? + + def read_only=: (bool) -> bool + + attr_reader tool_names: ::Array[String]? + + def tool_names=: (::Array[String]) -> ::Array[String] + + def initialize: ( + ?read_only: bool, + ?tool_names: ::Array[String] + ) -> void + + def to_hash: -> { + read_only: bool, + tool_names: ::Array[String] + } + end + + type never = { read_only: bool, tool_names: ::Array[String] } + + class Never < OpenAI::Internal::Type::BaseModel + attr_reader read_only: bool? + + def read_only=: (bool) -> bool + + attr_reader tool_names: ::Array[String]? + + def tool_names=: (::Array[String]) -> ::Array[String] + + def initialize: ( + ?read_only: bool, + ?tool_names: ::Array[String] + ) -> void + + def to_hash: -> { + read_only: bool, + tool_names: ::Array[String] + } + end + end + + type mcp_tool_approval_setting = :always | :never + + module McpToolApprovalSetting + extend OpenAI::Internal::Type::Enum + + ALWAYS: :always + NEVER: :never + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::mcp_tool_approval_setting] + end + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::require_approval] + end end + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tool] end type tracing = @@ -481,44 +713,14 @@ module OpenAI def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::tracing] end - type turn_detection = - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - - class TurnDetection < OpenAI::Internal::Type::BaseModel - attr_reader prefix_padding_ms: Integer? - - def prefix_padding_ms=: (Integer) -> Integer - - attr_reader silence_duration_ms: Integer? - - def silence_duration_ms=: (Integer) -> Integer + type type_ = :realtime - attr_reader threshold: Float? - - def threshold=: (Float) -> Float - - attr_reader type: String? - - def type=: (String) -> String + module Type + extend OpenAI::Internal::Type::Enum - def initialize: ( - ?prefix_padding_ms: Integer, - ?silence_duration_ms: Integer, - ?threshold: Float, - ?type: String - ) -> void + REALTIME: :realtime - def to_hash: -> { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeSessionCreateResponse::type_] end end end diff --git a/sig/openai/models/realtime/realtime_tools_config_union.rbs b/sig/openai/models/realtime/realtime_tools_config_union.rbs index 455b739c..dd53d8ea 100644 --- a/sig/openai/models/realtime/realtime_tools_config_union.rbs +++ b/sig/openai/models/realtime/realtime_tools_config_union.rbs @@ -2,64 +2,12 @@ module OpenAI module Models module Realtime type realtime_tools_config_union = - OpenAI::Realtime::RealtimeToolsConfigUnion::Function + OpenAI::Realtime::Models | OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp module RealtimeToolsConfigUnion extend OpenAI::Internal::Type::Union - type function = - { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::type_ - } - - class Function < OpenAI::Internal::Type::BaseModel - attr_reader description: String? - - def description=: (String) -> String - - attr_reader name: String? - - def name=: (String) -> String - - attr_reader parameters: top? - - def parameters=: (top) -> top - - attr_reader type: OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::type_? - - def type=: ( - OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::type_ - ) -> OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::type_ - - def initialize: ( - ?description: String, - ?name: String, - ?parameters: top, - ?type: OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::type_ - ) -> void - - def to_hash: -> { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::type_ - } - - type type_ = :function - - module Type - extend OpenAI::Internal::Type::Enum - - FUNCTION: :function - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::type_] - end - end - type mcp = { server_label: String, diff --git a/sig/openai/models/realtime/realtime_transcription_session_audio.rbs b/sig/openai/models/realtime/realtime_transcription_session_audio.rbs new file mode 100644 index 00000000..32a63989 --- /dev/null +++ b/sig/openai/models/realtime/realtime_transcription_session_audio.rbs @@ -0,0 +1,24 @@ +module OpenAI + module Models + module Realtime + type realtime_transcription_session_audio = + { input: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput } + + class RealtimeTranscriptionSessionAudio < OpenAI::Internal::Type::BaseModel + attr_reader input: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput? + + def input=: ( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput + ) -> OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput + + def initialize: ( + ?input: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput + ) -> void + + def to_hash: -> { + input: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput + } + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs b/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs new file mode 100644 index 00000000..44b5b8fa --- /dev/null +++ b/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs @@ -0,0 +1,72 @@ +module OpenAI + module Models + module Realtime + type realtime_transcription_session_audio_input = + { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction, + transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection + } + + class RealtimeTranscriptionSessionAudioInput < OpenAI::Internal::Type::BaseModel + attr_reader format_: OpenAI::Models::Realtime::realtime_audio_formats? + + def format_=: ( + OpenAI::Models::Realtime::realtime_audio_formats + ) -> OpenAI::Models::Realtime::realtime_audio_formats + + attr_reader noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction? + + def noise_reduction=: ( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction + ) -> OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction + + attr_reader transcription: OpenAI::Realtime::AudioTranscription? + + def transcription=: ( + OpenAI::Realtime::AudioTranscription + ) -> OpenAI::Realtime::AudioTranscription + + attr_reader turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection? + + def turn_detection=: ( + OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection + ) -> OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection + + def initialize: ( + ?format_: OpenAI::Models::Realtime::realtime_audio_formats, + ?noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction, + ?transcription: OpenAI::Realtime::AudioTranscription, + ?turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection + ) -> void + + def to_hash: -> { + format_: OpenAI::Models::Realtime::realtime_audio_formats, + noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction, + transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection + } + + type noise_reduction = + { type: OpenAI::Models::Realtime::noise_reduction_type } + + class NoiseReduction < OpenAI::Internal::Type::BaseModel + attr_reader type: OpenAI::Models::Realtime::noise_reduction_type? + + def type=: ( + OpenAI::Models::Realtime::noise_reduction_type + ) -> OpenAI::Models::Realtime::noise_reduction_type + + def initialize: ( + ?type: OpenAI::Models::Realtime::noise_reduction_type + ) -> void + + def to_hash: -> { + type: OpenAI::Models::Realtime::noise_reduction_type + } + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs b/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs new file mode 100644 index 00000000..56ac5314 --- /dev/null +++ b/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs @@ -0,0 +1,99 @@ +module OpenAI + module Models + module Realtime + type realtime_transcription_session_audio_input_turn_detection = + { + create_response: bool, + eagerness: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::eagerness, + idle_timeout_ms: Integer?, + interrupt_response: bool, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::type_ + } + + class RealtimeTranscriptionSessionAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel + attr_reader create_response: bool? + + def create_response=: (bool) -> bool + + attr_reader eagerness: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::eagerness? + + def eagerness=: ( + OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::eagerness + ) -> OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::eagerness + + attr_accessor idle_timeout_ms: Integer? + + attr_reader interrupt_response: bool? + + def interrupt_response=: (bool) -> bool + + attr_reader prefix_padding_ms: Integer? + + def prefix_padding_ms=: (Integer) -> Integer + + attr_reader silence_duration_ms: Integer? + + def silence_duration_ms=: (Integer) -> Integer + + attr_reader threshold: Float? + + def threshold=: (Float) -> Float + + attr_reader type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::type_? + + def type=: ( + OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::type_ + ) -> OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::type_ + + def initialize: ( + ?create_response: bool, + ?eagerness: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::eagerness, + ?idle_timeout_ms: Integer?, + ?interrupt_response: bool, + ?prefix_padding_ms: Integer, + ?silence_duration_ms: Integer, + ?threshold: Float, + ?type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::type_ + ) -> void + + def to_hash: -> { + create_response: bool, + eagerness: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::eagerness, + idle_timeout_ms: Integer?, + interrupt_response: bool, + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::type_ + } + + type eagerness = :low | :medium | :high | :auto + + module Eagerness + extend OpenAI::Internal::Type::Enum + + LOW: :low + MEDIUM: :medium + HIGH: :high + AUTO: :auto + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::eagerness] + end + + type type_ = :server_vad | :semantic_vad + + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD: :server_vad + SEMANTIC_VAD: :semantic_vad + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::type_] + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs b/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs new file mode 100644 index 00000000..06e0d731 --- /dev/null +++ b/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs @@ -0,0 +1,20 @@ +module OpenAI + module Models + class RealtimeTranscriptionSessionClientSecret = Realtime::RealtimeTranscriptionSessionClientSecret + + module Realtime + type realtime_transcription_session_client_secret = + { expires_at: Integer, value: String } + + class RealtimeTranscriptionSessionClientSecret < OpenAI::Internal::Type::BaseModel + attr_accessor expires_at: Integer + + attr_accessor value: String + + def initialize: (expires_at: Integer, value: String) -> void + + def to_hash: -> { expires_at: Integer, value: String } + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs b/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs index 63936ef5..7707d4a8 100644 --- a/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +++ b/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs @@ -3,86 +3,38 @@ module OpenAI module Realtime type realtime_transcription_session_create_request = { - model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::model, type: :transcription, - include: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_], - input_audio_format: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::input_audio_format, - input_audio_noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction, - input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription, - turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection + audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio, + include: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_] } class RealtimeTranscriptionSessionCreateRequest < OpenAI::Internal::Type::BaseModel - attr_accessor model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::model - attr_accessor type: :transcription + attr_reader audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio? + + def audio=: ( + OpenAI::Realtime::RealtimeTranscriptionSessionAudio + ) -> OpenAI::Realtime::RealtimeTranscriptionSessionAudio + attr_reader include: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_]? def include=: ( ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_] ) -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_] - attr_reader input_audio_format: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::input_audio_format? - - def input_audio_format=: ( - OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::input_audio_format - ) -> OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::input_audio_format - - attr_reader input_audio_noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction? - - def input_audio_noise_reduction=: ( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction - ) -> OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction - - attr_reader input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription? - - def input_audio_transcription=: ( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription - ) -> OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription - - attr_reader turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection? - - def turn_detection=: ( - OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection - ) -> OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection - def initialize: ( - model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::model, + ?audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio, ?include: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_], - ?input_audio_format: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::input_audio_format, - ?input_audio_noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction, - ?input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription, - ?turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection, ?type: :transcription ) -> void def to_hash: -> { - model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::model, type: :transcription, - include: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_], - input_audio_format: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::input_audio_format, - input_audio_noise_reduction: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction, - input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription, - turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection + audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio, + include: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_] } - type model = - String - | :"whisper-1" - | :"gpt-4o-transcribe" - | :"gpt-4o-mini-transcribe" - - module Model - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::model] - - WHISPER_1: :"whisper-1" - GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" - end - type include_ = :"item.input_audio_transcription.logprobs" module Include @@ -92,150 +44,6 @@ module OpenAI def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::include_] end - - type input_audio_format = :pcm16 | :g711_ulaw | :g711_alaw - - module InputAudioFormat - extend OpenAI::Internal::Type::Enum - - PCM16: :pcm16 - G711_ULAW: :g711_ulaw - G711_ALAW: :g711_alaw - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::input_audio_format] - end - - type input_audio_noise_reduction = - { - type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::type_ - } - - class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel - attr_reader type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::type_? - - def type=: ( - OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::type_ - ) -> OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::type_ - - def initialize: ( - ?type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::type_ - ) -> void - - def to_hash: -> { - type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::type_ - } - - type type_ = :near_field | :far_field - - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD: :near_field - FAR_FIELD: :far_field - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::type_] - end - end - - type input_audio_transcription = - { - language: String, - model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::model, - prompt: String - } - - class InputAudioTranscription < OpenAI::Internal::Type::BaseModel - attr_reader language: String? - - def language=: (String) -> String - - attr_reader model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::model? - - def model=: ( - OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::model - ) -> OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::model - - attr_reader prompt: String? - - def prompt=: (String) -> String - - def initialize: ( - ?language: String, - ?model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::model, - ?prompt: String - ) -> void - - def to_hash: -> { - language: String, - model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::model, - prompt: String - } - - type model = - :"gpt-4o-transcribe" | :"gpt-4o-mini-transcribe" | :"whisper-1" - - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" - WHISPER_1: :"whisper-1" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::model] - end - end - - type turn_detection = - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::type_ - } - - class TurnDetection < OpenAI::Internal::Type::BaseModel - attr_reader prefix_padding_ms: Integer? - - def prefix_padding_ms=: (Integer) -> Integer - - attr_reader silence_duration_ms: Integer? - - def silence_duration_ms=: (Integer) -> Integer - - attr_reader threshold: Float? - - def threshold=: (Float) -> Float - - attr_reader type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::type_? - - def type=: ( - OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::type_ - ) -> OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::type_ - - def initialize: ( - ?prefix_padding_ms: Integer, - ?silence_duration_ms: Integer, - ?threshold: Float, - ?type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::type_ - ) -> void - - def to_hash: -> { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::type_ - } - - type type_ = :server_vad - - module Type - extend OpenAI::Internal::Type::Enum - - SERVER_VAD: :server_vad - - def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::type_] - end - end end end end diff --git a/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs b/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs new file mode 100644 index 00000000..9b28ed78 --- /dev/null +++ b/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs @@ -0,0 +1,69 @@ +module OpenAI + module Models + class RealtimeTranscriptionSessionCreateResponse = Realtime::RealtimeTranscriptionSessionCreateResponse + + module Realtime + type realtime_transcription_session_create_response = + { + client_secret: OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret, + input_audio_format: String, + input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription, + modalities: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::modality], + turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection + } + + class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel + attr_accessor client_secret: OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret + + attr_reader input_audio_format: String? + + def input_audio_format=: (String) -> String + + attr_reader input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription? + + def input_audio_transcription=: ( + OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription + ) -> OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription + + attr_reader modalities: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::modality]? + + def modalities=: ( + ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::modality] + ) -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::modality] + + attr_reader turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection? + + def turn_detection=: ( + OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection + ) -> OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection + + def initialize: ( + client_secret: OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret, + ?input_audio_format: String, + ?input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription, + ?modalities: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::modality], + ?turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection + ) -> void + + def to_hash: -> { + client_secret: OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret, + input_audio_format: String, + input_audio_transcription: OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription, + modalities: ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::modality], + turn_detection: OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection + } + + type modality = :text | :audio + + module Modality + extend OpenAI::Internal::Type::Enum + + TEXT: :text + AUDIO: :audio + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::modality] + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs b/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs new file mode 100644 index 00000000..7da5320d --- /dev/null +++ b/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs @@ -0,0 +1,59 @@ +module OpenAI + module Models + class RealtimeTranscriptionSessionInputAudioTranscription = Realtime::RealtimeTranscriptionSessionInputAudioTranscription + + module Realtime + type realtime_transcription_session_input_audio_transcription = + { + language: String, + model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::model, + prompt: String + } + + class RealtimeTranscriptionSessionInputAudioTranscription < OpenAI::Internal::Type::BaseModel + attr_reader language: String? + + def language=: (String) -> String + + attr_reader model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::model? + + def model=: ( + OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::model + ) -> OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::model + + attr_reader prompt: String? + + def prompt=: (String) -> String + + def initialize: ( + ?language: String, + ?model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::model, + ?prompt: String + ) -> void + + def to_hash: -> { + language: String, + model: OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::model, + prompt: String + } + + type model = + :"whisper-1" + | :"gpt-4o-transcribe-latest" + | :"gpt-4o-mini-transcribe" + | :"gpt-4o-transcribe" + + module Model + extend OpenAI::Internal::Type::Enum + + WHISPER_1: :"whisper-1" + GPT_4O_TRANSCRIBE_LATEST: :"gpt-4o-transcribe-latest" + GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" + GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" + + def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::model] + end + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs b/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs new file mode 100644 index 00000000..4c33c0cd --- /dev/null +++ b/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs @@ -0,0 +1,47 @@ +module OpenAI + module Models + class RealtimeTranscriptionSessionTurnDetection = Realtime::RealtimeTranscriptionSessionTurnDetection + + module Realtime + type realtime_transcription_session_turn_detection = + { + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: String + } + + class RealtimeTranscriptionSessionTurnDetection < OpenAI::Internal::Type::BaseModel + attr_reader prefix_padding_ms: Integer? + + def prefix_padding_ms=: (Integer) -> Integer + + attr_reader silence_duration_ms: Integer? + + def silence_duration_ms=: (Integer) -> Integer + + attr_reader threshold: Float? + + def threshold=: (Float) -> Float + + attr_reader type: String? + + def type=: (String) -> String + + def initialize: ( + ?prefix_padding_ms: Integer, + ?silence_duration_ms: Integer, + ?threshold: Float, + ?type: String + ) -> void + + def to_hash: -> { + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: String + } + end + end + end +end diff --git a/sig/openai/models/realtime/realtime_truncation.rbs b/sig/openai/models/realtime/realtime_truncation.rbs index a8f9a8af..5522e871 100644 --- a/sig/openai/models/realtime/realtime_truncation.rbs +++ b/sig/openai/models/realtime/realtime_truncation.rbs @@ -3,7 +3,7 @@ module OpenAI module Realtime type realtime_truncation = OpenAI::Models::Realtime::RealtimeTruncation::realtime_truncation_strategy - | OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation + | OpenAI::Realtime::RealtimeTruncationRetentionRatio module RealtimeTruncation extend OpenAI::Internal::Type::Union @@ -19,33 +19,6 @@ module OpenAI def self?.values: -> ::Array[OpenAI::Models::Realtime::RealtimeTruncation::realtime_truncation_strategy] end - type retention_ratio_truncation = - { - retention_ratio: Float, - type: :retention_ratio, - post_instructions_token_limit: Integer? - } - - class RetentionRatioTruncation < OpenAI::Internal::Type::BaseModel - attr_accessor retention_ratio: Float - - attr_accessor type: :retention_ratio - - attr_accessor post_instructions_token_limit: Integer? - - def initialize: ( - retention_ratio: Float, - ?post_instructions_token_limit: Integer?, - ?type: :retention_ratio - ) -> void - - def to_hash: -> { - retention_ratio: Float, - type: :retention_ratio, - post_instructions_token_limit: Integer? - } - end - def self?.variants: -> ::Array[OpenAI::Models::Realtime::realtime_truncation] end end diff --git a/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs b/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs new file mode 100644 index 00000000..15d9917c --- /dev/null +++ b/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs @@ -0,0 +1,21 @@ +module OpenAI + module Models + module Realtime + type realtime_truncation_retention_ratio = + { retention_ratio: Float, type: :retention_ratio } + + class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel + attr_accessor retention_ratio: Float + + attr_accessor type: :retention_ratio + + def initialize: ( + retention_ratio: Float, + ?type: :retention_ratio + ) -> void + + def to_hash: -> { retention_ratio: Float, type: :retention_ratio } + end + end + end +end diff --git a/sig/openai/models/realtime/response_create_event.rbs b/sig/openai/models/realtime/response_create_event.rbs index 74eac018..d3548d3d 100644 --- a/sig/openai/models/realtime/response_create_event.rbs +++ b/sig/openai/models/realtime/response_create_event.rbs @@ -5,7 +5,7 @@ module OpenAI { type: :"response.create", event_id: String, - response: OpenAI::Realtime::ResponseCreateEvent::Response + response: OpenAI::Realtime::RealtimeResponseCreateParams } class ResponseCreateEvent < OpenAI::Internal::Type::BaseModel @@ -15,266 +15,23 @@ module OpenAI def event_id=: (String) -> String - attr_reader response: OpenAI::Realtime::ResponseCreateEvent::Response? + attr_reader response: OpenAI::Realtime::RealtimeResponseCreateParams? def response=: ( - OpenAI::Realtime::ResponseCreateEvent::Response - ) -> OpenAI::Realtime::ResponseCreateEvent::Response + OpenAI::Realtime::RealtimeResponseCreateParams + ) -> OpenAI::Realtime::RealtimeResponseCreateParams def initialize: ( ?event_id: String, - ?response: OpenAI::Realtime::ResponseCreateEvent::Response, + ?response: OpenAI::Realtime::RealtimeResponseCreateParams, ?type: :"response.create" ) -> void def to_hash: -> { type: :"response.create", event_id: String, - response: OpenAI::Realtime::ResponseCreateEvent::Response + response: OpenAI::Realtime::RealtimeResponseCreateParams } - - type response = - { - conversation: OpenAI::Models::Realtime::ResponseCreateEvent::Response::conversation, - input: ::Array[OpenAI::Models::Realtime::conversation_item], - instructions: String, - max_output_tokens: OpenAI::Models::Realtime::ResponseCreateEvent::Response::max_output_tokens, - metadata: OpenAI::Models::metadata?, - modalities: ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::modality], - output_audio_format: OpenAI::Models::Realtime::ResponseCreateEvent::Response::output_audio_format, - prompt: OpenAI::Responses::ResponsePrompt?, - temperature: Float, - tool_choice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::tool_choice, - tools: ::Array[OpenAI::Realtime::ResponseCreateEvent::Response::Tool], - voice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::voice - } - - class Response < OpenAI::Internal::Type::BaseModel - attr_reader conversation: OpenAI::Models::Realtime::ResponseCreateEvent::Response::conversation? - - def conversation=: ( - OpenAI::Models::Realtime::ResponseCreateEvent::Response::conversation - ) -> OpenAI::Models::Realtime::ResponseCreateEvent::Response::conversation - - attr_reader input: ::Array[OpenAI::Models::Realtime::conversation_item]? - - def input=: ( - ::Array[OpenAI::Models::Realtime::conversation_item] - ) -> ::Array[OpenAI::Models::Realtime::conversation_item] - - attr_reader instructions: String? - - def instructions=: (String) -> String - - attr_reader max_output_tokens: OpenAI::Models::Realtime::ResponseCreateEvent::Response::max_output_tokens? - - def max_output_tokens=: ( - OpenAI::Models::Realtime::ResponseCreateEvent::Response::max_output_tokens - ) -> OpenAI::Models::Realtime::ResponseCreateEvent::Response::max_output_tokens - - attr_accessor metadata: OpenAI::Models::metadata? - - attr_reader modalities: ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::modality]? - - def modalities=: ( - ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::modality] - ) -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::modality] - - attr_reader output_audio_format: OpenAI::Models::Realtime::ResponseCreateEvent::Response::output_audio_format? - - def output_audio_format=: ( - OpenAI::Models::Realtime::ResponseCreateEvent::Response::output_audio_format - ) -> OpenAI::Models::Realtime::ResponseCreateEvent::Response::output_audio_format - - attr_accessor prompt: OpenAI::Responses::ResponsePrompt? - - attr_reader temperature: Float? - - def temperature=: (Float) -> Float - - attr_reader tool_choice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::tool_choice? - - def tool_choice=: ( - OpenAI::Models::Realtime::ResponseCreateEvent::Response::tool_choice - ) -> OpenAI::Models::Realtime::ResponseCreateEvent::Response::tool_choice - - attr_reader tools: ::Array[OpenAI::Realtime::ResponseCreateEvent::Response::Tool]? - - def tools=: ( - ::Array[OpenAI::Realtime::ResponseCreateEvent::Response::Tool] - ) -> ::Array[OpenAI::Realtime::ResponseCreateEvent::Response::Tool] - - attr_reader voice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::voice? - - def voice=: ( - OpenAI::Models::Realtime::ResponseCreateEvent::Response::voice - ) -> OpenAI::Models::Realtime::ResponseCreateEvent::Response::voice - - def initialize: ( - ?conversation: OpenAI::Models::Realtime::ResponseCreateEvent::Response::conversation, - ?input: ::Array[OpenAI::Models::Realtime::conversation_item], - ?instructions: String, - ?max_output_tokens: OpenAI::Models::Realtime::ResponseCreateEvent::Response::max_output_tokens, - ?metadata: OpenAI::Models::metadata?, - ?modalities: ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::modality], - ?output_audio_format: OpenAI::Models::Realtime::ResponseCreateEvent::Response::output_audio_format, - ?prompt: OpenAI::Responses::ResponsePrompt?, - ?temperature: Float, - ?tool_choice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::tool_choice, - ?tools: ::Array[OpenAI::Realtime::ResponseCreateEvent::Response::Tool], - ?voice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::voice - ) -> void - - def to_hash: -> { - conversation: OpenAI::Models::Realtime::ResponseCreateEvent::Response::conversation, - input: ::Array[OpenAI::Models::Realtime::conversation_item], - instructions: String, - max_output_tokens: OpenAI::Models::Realtime::ResponseCreateEvent::Response::max_output_tokens, - metadata: OpenAI::Models::metadata?, - modalities: ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::modality], - output_audio_format: OpenAI::Models::Realtime::ResponseCreateEvent::Response::output_audio_format, - prompt: OpenAI::Responses::ResponsePrompt?, - temperature: Float, - tool_choice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::tool_choice, - tools: ::Array[OpenAI::Realtime::ResponseCreateEvent::Response::Tool], - voice: OpenAI::Models::Realtime::ResponseCreateEvent::Response::voice - } - - type conversation = String | :auto | :none - - module Conversation - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::conversation] - - AUTO: :auto - NONE: :none - end - - type max_output_tokens = Integer | :inf - - module MaxOutputTokens - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::max_output_tokens] - end - - type modality = :text | :audio - - module Modality - extend OpenAI::Internal::Type::Enum - - TEXT: :text - AUDIO: :audio - - def self?.values: -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::modality] - end - - type output_audio_format = :pcm16 | :g711_ulaw | :g711_alaw - - module OutputAudioFormat - extend OpenAI::Internal::Type::Enum - - PCM16: :pcm16 - G711_ULAW: :g711_ulaw - G711_ALAW: :g711_alaw - - def self?.values: -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::output_audio_format] - end - - type tool_choice = - OpenAI::Models::Responses::tool_choice_options - | OpenAI::Responses::ToolChoiceFunction - | OpenAI::Responses::ToolChoiceMcp - - module ToolChoice - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::tool_choice] - end - - type tool = - { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::type_ - } - - class Tool < OpenAI::Internal::Type::BaseModel - attr_reader description: String? - - def description=: (String) -> String - - attr_reader name: String? - - def name=: (String) -> String - - attr_reader parameters: top? - - def parameters=: (top) -> top - - attr_reader type: OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::type_? - - def type=: ( - OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::type_ - ) -> OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::type_ - - def initialize: ( - ?description: String, - ?name: String, - ?parameters: top, - ?type: OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::type_ - ) -> void - - def to_hash: -> { - description: String, - name: String, - parameters: top, - type: OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::type_ - } - - type type_ = :function - - module Type - extend OpenAI::Internal::Type::Enum - - FUNCTION: :function - - def self?.values: -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::Tool::type_] - end - end - - type voice = - String - | :alloy - | :ash - | :ballad - | :coral - | :echo - | :sage - | :shimmer - | :verse - | :marin - | :cedar - - module Voice - extend OpenAI::Internal::Type::Union - - def self?.variants: -> ::Array[OpenAI::Models::Realtime::ResponseCreateEvent::Response::voice] - - ALLOY: :alloy - ASH: :ash - BALLAD: :ballad - CORAL: :coral - ECHO: :echo - SAGE: :sage - SHIMMER: :shimmer - VERSE: :verse - MARIN: :marin - CEDAR: :cedar - end - end end end end diff --git a/sig/openai/models/realtime/session_created_event.rbs b/sig/openai/models/realtime/session_created_event.rbs index 2c979b90..e5e25616 100644 --- a/sig/openai/models/realtime/session_created_event.rbs +++ b/sig/openai/models/realtime/session_created_event.rbs @@ -4,28 +4,38 @@ module OpenAI type session_created_event = { event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: OpenAI::Models::Realtime::SessionCreatedEvent::session, type: :"session.created" } class SessionCreatedEvent < OpenAI::Internal::Type::BaseModel attr_accessor event_id: String - attr_accessor session: OpenAI::Realtime::RealtimeSession + attr_accessor session: OpenAI::Models::Realtime::SessionCreatedEvent::session attr_accessor type: :"session.created" def initialize: ( event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: OpenAI::Models::Realtime::SessionCreatedEvent::session, ?type: :"session.created" ) -> void def to_hash: -> { event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: OpenAI::Models::Realtime::SessionCreatedEvent::session, type: :"session.created" } + + type session = + OpenAI::Realtime::RealtimeSessionCreateRequest + | OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + + module Session + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::SessionCreatedEvent::session] + end end end end diff --git a/sig/openai/models/realtime/session_update_event.rbs b/sig/openai/models/realtime/session_update_event.rbs index f181b5e6..87053eef 100644 --- a/sig/openai/models/realtime/session_update_event.rbs +++ b/sig/openai/models/realtime/session_update_event.rbs @@ -3,13 +3,13 @@ module OpenAI module Realtime type session_update_event = { - session: OpenAI::Realtime::RealtimeSessionCreateRequest, + session: OpenAI::Models::Realtime::SessionUpdateEvent::session, type: :"session.update", event_id: String } class SessionUpdateEvent < OpenAI::Internal::Type::BaseModel - attr_accessor session: OpenAI::Realtime::RealtimeSessionCreateRequest + attr_accessor session: OpenAI::Models::Realtime::SessionUpdateEvent::session attr_accessor type: :"session.update" @@ -18,16 +18,26 @@ module OpenAI def event_id=: (String) -> String def initialize: ( - session: OpenAI::Realtime::RealtimeSessionCreateRequest, + session: OpenAI::Models::Realtime::SessionUpdateEvent::session, ?event_id: String, ?type: :"session.update" ) -> void def to_hash: -> { - session: OpenAI::Realtime::RealtimeSessionCreateRequest, + session: OpenAI::Models::Realtime::SessionUpdateEvent::session, type: :"session.update", event_id: String } + + type session = + OpenAI::Realtime::RealtimeSessionCreateRequest + | OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + + module Session + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::SessionUpdateEvent::session] + end end end end diff --git a/sig/openai/models/realtime/session_updated_event.rbs b/sig/openai/models/realtime/session_updated_event.rbs index 048e75a2..f1ee69c2 100644 --- a/sig/openai/models/realtime/session_updated_event.rbs +++ b/sig/openai/models/realtime/session_updated_event.rbs @@ -4,28 +4,38 @@ module OpenAI type session_updated_event = { event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: OpenAI::Models::Realtime::SessionUpdatedEvent::session, type: :"session.updated" } class SessionUpdatedEvent < OpenAI::Internal::Type::BaseModel attr_accessor event_id: String - attr_accessor session: OpenAI::Realtime::RealtimeSession + attr_accessor session: OpenAI::Models::Realtime::SessionUpdatedEvent::session attr_accessor type: :"session.updated" def initialize: ( event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: OpenAI::Models::Realtime::SessionUpdatedEvent::session, ?type: :"session.updated" ) -> void def to_hash: -> { event_id: String, - session: OpenAI::Realtime::RealtimeSession, + session: OpenAI::Models::Realtime::SessionUpdatedEvent::session, type: :"session.updated" } + + type session = + OpenAI::Realtime::RealtimeSessionCreateRequest + | OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + + module Session + extend OpenAI::Internal::Type::Union + + def self?.variants: -> ::Array[OpenAI::Models::Realtime::SessionUpdatedEvent::session] + end end end end diff --git a/sig/openai/models/realtime/transcription_session_created.rbs b/sig/openai/models/realtime/transcription_session_created.rbs index 36fe920b..73c1a673 100644 --- a/sig/openai/models/realtime/transcription_session_created.rbs +++ b/sig/openai/models/realtime/transcription_session_created.rbs @@ -4,278 +4,28 @@ module OpenAI type transcription_session_created = { event_id: String, - session: OpenAI::Realtime::TranscriptionSessionCreated::Session, + session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, type: :"transcription_session.created" } class TranscriptionSessionCreated < OpenAI::Internal::Type::BaseModel attr_accessor event_id: String - attr_accessor session: OpenAI::Realtime::TranscriptionSessionCreated::Session + attr_accessor session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse attr_accessor type: :"transcription_session.created" def initialize: ( event_id: String, - session: OpenAI::Realtime::TranscriptionSessionCreated::Session, + session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, ?type: :"transcription_session.created" ) -> void def to_hash: -> { event_id: String, - session: OpenAI::Realtime::TranscriptionSessionCreated::Session, + session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, type: :"transcription_session.created" } - - type session = - { - id: String, - audio: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio, - expires_at: Integer, - include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::include_], - object: String - } - - class Session < OpenAI::Internal::Type::BaseModel - attr_reader id: String? - - def id=: (String) -> String - - attr_reader audio: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio? - - def audio=: ( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio - ) -> OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio - - attr_reader expires_at: Integer? - - def expires_at=: (Integer) -> Integer - - attr_reader include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::include_]? - - def include=: ( - ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::include_] - ) -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::include_] - - attr_reader object: String? - - def object=: (String) -> String - - def initialize: ( - ?id: String, - ?audio: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio, - ?expires_at: Integer, - ?include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::include_], - ?object: String - ) -> void - - def to_hash: -> { - id: String, - audio: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio, - expires_at: Integer, - include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::include_], - object: String - } - - type audio = - { - input: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input - } - - class Audio < OpenAI::Internal::Type::BaseModel - attr_reader input: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input? - - def input=: ( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input - ) -> OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input - - def initialize: ( - ?input: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input - ) -> void - - def to_hash: -> { - input: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input - } - - type input = - { - format_: String, - noise_reduction: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, - transcription: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, - turn_detection: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection - } - - class Input < OpenAI::Internal::Type::BaseModel - attr_reader format_: String? - - def format_=: (String) -> String - - attr_reader noise_reduction: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction? - - def noise_reduction=: ( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction - ) -> OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction - - attr_reader transcription: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription? - - def transcription=: ( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription - ) -> OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription - - attr_reader turn_detection: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection? - - def turn_detection=: ( - OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection - ) -> OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection - - def initialize: ( - ?format_: String, - ?noise_reduction: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, - ?transcription: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, - ?turn_detection: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection - ) -> void - - def to_hash: -> { - format_: String, - noise_reduction: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, - transcription: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, - turn_detection: OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection - } - - type noise_reduction = - { - type: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::type_ - } - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - attr_reader type: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::type_? - - def type=: ( - OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::type_ - ) -> OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::type_ - - def initialize: ( - ?type: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::type_ - ) -> void - - def to_hash: -> { - type: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::type_ - } - - type type_ = :near_field | :far_field - - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD: :near_field - FAR_FIELD: :far_field - - def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::type_] - end - end - - type transcription = - { - language: String, - model: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::model, - prompt: String - } - - class Transcription < OpenAI::Internal::Type::BaseModel - attr_reader language: String? - - def language=: (String) -> String - - attr_reader model: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::model? - - def model=: ( - OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::model - ) -> OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::model - - attr_reader prompt: String? - - def prompt=: (String) -> String - - def initialize: ( - ?language: String, - ?model: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::model, - ?prompt: String - ) -> void - - def to_hash: -> { - language: String, - model: OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::model, - prompt: String - } - - type model = - :"gpt-4o-transcribe" - | :"gpt-4o-mini-transcribe" - | :"whisper-1" - - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" - WHISPER_1: :"whisper-1" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::model] - end - end - - type turn_detection = - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - - class TurnDetection < OpenAI::Internal::Type::BaseModel - attr_reader prefix_padding_ms: Integer? - - def prefix_padding_ms=: (Integer) -> Integer - - attr_reader silence_duration_ms: Integer? - - def silence_duration_ms=: (Integer) -> Integer - - attr_reader threshold: Float? - - def threshold=: (Float) -> Float - - attr_reader type: String? - - def type=: (String) -> String - - def initialize: ( - ?prefix_padding_ms: Integer, - ?silence_duration_ms: Integer, - ?threshold: Float, - ?type: String - ) -> void - - def to_hash: -> { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - end - end - end - - type include_ = :"item.input_audio_transcription.logprobs" - - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS: :"item.input_audio_transcription.logprobs" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::include_] - end - end end end end diff --git a/sig/openai/models/realtime/transcription_session_update.rbs b/sig/openai/models/realtime/transcription_session_update.rbs index 62e70739..1982bc23 100644 --- a/sig/openai/models/realtime/transcription_session_update.rbs +++ b/sig/openai/models/realtime/transcription_session_update.rbs @@ -3,13 +3,13 @@ module OpenAI module Realtime type transcription_session_update = { - session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest, + session: OpenAI::Realtime::TranscriptionSessionUpdate::Session, type: :"transcription_session.update", event_id: String } class TranscriptionSessionUpdate < OpenAI::Internal::Type::BaseModel - attr_accessor session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest + attr_accessor session: OpenAI::Realtime::TranscriptionSessionUpdate::Session attr_accessor type: :"transcription_session.update" @@ -18,16 +18,166 @@ module OpenAI def event_id=: (String) -> String def initialize: ( - session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest, + session: OpenAI::Realtime::TranscriptionSessionUpdate::Session, ?event_id: String, ?type: :"transcription_session.update" ) -> void def to_hash: -> { - session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest, + session: OpenAI::Realtime::TranscriptionSessionUpdate::Session, type: :"transcription_session.update", event_id: String } + + type session = + { + include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::include_], + input_audio_format: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::input_audio_format, + input_audio_noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, + input_audio_transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection + } + + class Session < OpenAI::Internal::Type::BaseModel + attr_reader include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::include_]? + + def include=: ( + ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::include_] + ) -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::include_] + + attr_reader input_audio_format: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::input_audio_format? + + def input_audio_format=: ( + OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::input_audio_format + ) -> OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::input_audio_format + + attr_reader input_audio_noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction? + + def input_audio_noise_reduction=: ( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction + ) -> OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction + + attr_reader input_audio_transcription: OpenAI::Realtime::AudioTranscription? + + def input_audio_transcription=: ( + OpenAI::Realtime::AudioTranscription + ) -> OpenAI::Realtime::AudioTranscription + + attr_reader turn_detection: OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection? + + def turn_detection=: ( + OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection + ) -> OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection + + def initialize: ( + ?include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::include_], + ?input_audio_format: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::input_audio_format, + ?input_audio_noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, + ?input_audio_transcription: OpenAI::Realtime::AudioTranscription, + ?turn_detection: OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection + ) -> void + + def to_hash: -> { + include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::include_], + input_audio_format: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::input_audio_format, + input_audio_noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, + input_audio_transcription: OpenAI::Realtime::AudioTranscription, + turn_detection: OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection + } + + type include_ = :"item.input_audio_transcription.logprobs" + + module Include + extend OpenAI::Internal::Type::Enum + + ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS: :"item.input_audio_transcription.logprobs" + + def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::include_] + end + + type input_audio_format = :pcm16 | :g711_ulaw | :g711_alaw + + module InputAudioFormat + extend OpenAI::Internal::Type::Enum + + PCM16: :pcm16 + G711_ULAW: :g711_ulaw + G711_ALAW: :g711_alaw + + def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::input_audio_format] + end + + type input_audio_noise_reduction = + { type: OpenAI::Models::Realtime::noise_reduction_type } + + class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel + attr_reader type: OpenAI::Models::Realtime::noise_reduction_type? + + def type=: ( + OpenAI::Models::Realtime::noise_reduction_type + ) -> OpenAI::Models::Realtime::noise_reduction_type + + def initialize: ( + ?type: OpenAI::Models::Realtime::noise_reduction_type + ) -> void + + def to_hash: -> { + type: OpenAI::Models::Realtime::noise_reduction_type + } + end + + type turn_detection = + { + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::type_ + } + + class TurnDetection < OpenAI::Internal::Type::BaseModel + attr_reader prefix_padding_ms: Integer? + + def prefix_padding_ms=: (Integer) -> Integer + + attr_reader silence_duration_ms: Integer? + + def silence_duration_ms=: (Integer) -> Integer + + attr_reader threshold: Float? + + def threshold=: (Float) -> Float + + attr_reader type: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::type_? + + def type=: ( + OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::type_ + ) -> OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::type_ + + def initialize: ( + ?prefix_padding_ms: Integer, + ?silence_duration_ms: Integer, + ?threshold: Float, + ?type: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::type_ + ) -> void + + def to_hash: -> { + prefix_padding_ms: Integer, + silence_duration_ms: Integer, + threshold: Float, + type: OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::type_ + } + + type type_ = :server_vad + + module Type + extend OpenAI::Internal::Type::Enum + + SERVER_VAD: :server_vad + + def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::type_] + end + end + end end end end diff --git a/sig/openai/models/realtime/transcription_session_updated_event.rbs b/sig/openai/models/realtime/transcription_session_updated_event.rbs index b89c8a8c..ac91e4e9 100644 --- a/sig/openai/models/realtime/transcription_session_updated_event.rbs +++ b/sig/openai/models/realtime/transcription_session_updated_event.rbs @@ -4,278 +4,28 @@ module OpenAI type transcription_session_updated_event = { event_id: String, - session: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session, + session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, type: :"transcription_session.updated" } class TranscriptionSessionUpdatedEvent < OpenAI::Internal::Type::BaseModel attr_accessor event_id: String - attr_accessor session: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session + attr_accessor session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse attr_accessor type: :"transcription_session.updated" def initialize: ( event_id: String, - session: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session, + session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, ?type: :"transcription_session.updated" ) -> void def to_hash: -> { event_id: String, - session: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session, + session: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse, type: :"transcription_session.updated" } - - type session = - { - id: String, - audio: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio, - expires_at: Integer, - include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::include_], - object: String - } - - class Session < OpenAI::Internal::Type::BaseModel - attr_reader id: String? - - def id=: (String) -> String - - attr_reader audio: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio? - - def audio=: ( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio - ) -> OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio - - attr_reader expires_at: Integer? - - def expires_at=: (Integer) -> Integer - - attr_reader include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::include_]? - - def include=: ( - ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::include_] - ) -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::include_] - - attr_reader object: String? - - def object=: (String) -> String - - def initialize: ( - ?id: String, - ?audio: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio, - ?expires_at: Integer, - ?include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::include_], - ?object: String - ) -> void - - def to_hash: -> { - id: String, - audio: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio, - expires_at: Integer, - include: ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::include_], - object: String - } - - type audio = - { - input: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input - } - - class Audio < OpenAI::Internal::Type::BaseModel - attr_reader input: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input? - - def input=: ( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input - ) -> OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input - - def initialize: ( - ?input: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input - ) -> void - - def to_hash: -> { - input: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input - } - - type input = - { - format_: String, - noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction, - transcription: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription, - turn_detection: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection - } - - class Input < OpenAI::Internal::Type::BaseModel - attr_reader format_: String? - - def format_=: (String) -> String - - attr_reader noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction? - - def noise_reduction=: ( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction - ) -> OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction - - attr_reader transcription: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription? - - def transcription=: ( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription - ) -> OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription - - attr_reader turn_detection: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection? - - def turn_detection=: ( - OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection - ) -> OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection - - def initialize: ( - ?format_: String, - ?noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction, - ?transcription: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription, - ?turn_detection: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection - ) -> void - - def to_hash: -> { - format_: String, - noise_reduction: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction, - transcription: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription, - turn_detection: OpenAI::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::TurnDetection - } - - type noise_reduction = - { - type: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::type_ - } - - class NoiseReduction < OpenAI::Internal::Type::BaseModel - attr_reader type: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::type_? - - def type=: ( - OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::type_ - ) -> OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::type_ - - def initialize: ( - ?type: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::type_ - ) -> void - - def to_hash: -> { - type: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::type_ - } - - type type_ = :near_field | :far_field - - module Type - extend OpenAI::Internal::Type::Enum - - NEAR_FIELD: :near_field - FAR_FIELD: :far_field - - def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::NoiseReduction::type_] - end - end - - type transcription = - { - language: String, - model: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::model, - prompt: String - } - - class Transcription < OpenAI::Internal::Type::BaseModel - attr_reader language: String? - - def language=: (String) -> String - - attr_reader model: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::model? - - def model=: ( - OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::model - ) -> OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::model - - attr_reader prompt: String? - - def prompt=: (String) -> String - - def initialize: ( - ?language: String, - ?model: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::model, - ?prompt: String - ) -> void - - def to_hash: -> { - language: String, - model: OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::model, - prompt: String - } - - type model = - :"gpt-4o-transcribe" - | :"gpt-4o-mini-transcribe" - | :"whisper-1" - - module Model - extend OpenAI::Internal::Type::Enum - - GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe" - GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe" - WHISPER_1: :"whisper-1" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::Audio::Input::Transcription::model] - end - end - - type turn_detection = - { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - - class TurnDetection < OpenAI::Internal::Type::BaseModel - attr_reader prefix_padding_ms: Integer? - - def prefix_padding_ms=: (Integer) -> Integer - - attr_reader silence_duration_ms: Integer? - - def silence_duration_ms=: (Integer) -> Integer - - attr_reader threshold: Float? - - def threshold=: (Float) -> Float - - attr_reader type: String? - - def type=: (String) -> String - - def initialize: ( - ?prefix_padding_ms: Integer, - ?silence_duration_ms: Integer, - ?threshold: Float, - ?type: String - ) -> void - - def to_hash: -> { - prefix_padding_ms: Integer, - silence_duration_ms: Integer, - threshold: Float, - type: String - } - end - end - end - - type include_ = :"item.input_audio_transcription.logprobs" - - module Include - extend OpenAI::Internal::Type::Enum - - ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS: :"item.input_audio_transcription.logprobs" - - def self?.values: -> ::Array[OpenAI::Models::Realtime::TranscriptionSessionUpdatedEvent::Session::include_] - end - end end end end