From 1c0d3145e920a55f3c710323849bcac1fd60b703 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 10 Sep 2025 13:33:47 +0000 Subject: [PATCH 1/2] chore(api): fix realtime GA types --- .stats.yml | 6 +- api.md | 5 +- src/resources/realtime/client-secrets.ts | 142 ++++++++--------- src/resources/realtime/index.ts | 2 - src/resources/realtime/realtime.ts | 185 +++++++++++++++-------- 5 files changed, 199 insertions(+), 141 deletions(-) diff --git a/.stats.yml b/.stats.yml index 36a3c7f58..2aa16be87 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 118 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-7807ec6037efcee1af7decbfd3974a42b761fb6c6a71b4050fe43484d7fcbac4.yml -openapi_spec_hash: da6851e3891ad2659a50ed6a736fd32a -config_hash: 74d955cdc2377213f5268ea309090f6c +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml +openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132 +config_hash: 930dac3aa861344867e4ac84f037b5df diff --git a/api.md b/api.md index 0e1134d94..2f274e7cf 100644 --- a/api.md +++ b/api.md @@ -805,7 +805,6 @@ Types: - McpListToolsCompleted - McpListToolsFailed - McpListToolsInProgress -- Models - NoiseReductionType - OutputAudioBufferClearEvent - RateLimitsUpdatedEvent @@ -822,6 +821,7 @@ Types: - RealtimeConversationItemUserMessage - RealtimeError - RealtimeErrorEvent +- RealtimeFunctionTool - RealtimeMcpApprovalRequest - RealtimeMcpApprovalResponse - RealtimeMcpListTools @@ -874,7 +874,6 @@ Types: - SessionCreatedEvent - SessionUpdateEvent - SessionUpdatedEvent -- TranscriptionSessionCreated - TranscriptionSessionUpdate - TranscriptionSessionUpdatedEvent @@ -884,9 +883,7 @@ Types: - RealtimeSessionClientSecret - RealtimeSessionCreateResponse -- RealtimeTranscriptionSessionClientSecret - RealtimeTranscriptionSessionCreateResponse -- RealtimeTranscriptionSessionInputAudioTranscription - RealtimeTranscriptionSessionTurnDetection - ClientSecretCreateResponse diff --git a/src/resources/realtime/client-secrets.ts b/src/resources/realtime/client-secrets.ts index 6539260ac..5c53b2e5a 100644 --- a/src/resources/realtime/client-secrets.ts +++ b/src/resources/realtime/client-secrets.ts @@ -1,6 +1,7 @@ // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. import { APIResource } from '../../core/resource'; +import * as ClientSecretsAPI from './client-secrets'; import * as RealtimeAPI from './realtime'; import * as ResponsesAPI from '../responses/responses'; import { APIPromise } from '../../core/api-promise'; @@ -39,14 +40,19 @@ export interface RealtimeSessionClientSecret { */ export interface RealtimeSessionCreateResponse { /** - * Configuration for input and output audio. + * Ephemeral key returned by the API. */ - audio?: RealtimeSessionCreateResponse.Audio; + client_secret: RealtimeSessionClientSecret; /** - * Ephemeral key returned by the API. + * The type of session to create. Always `realtime` for the Realtime API. + */ + type: 'realtime'; + + /** + * Configuration for input and output audio. */ - client_secret?: RealtimeSessionClientSecret; + audio?: RealtimeSessionCreateResponse.Audio; /** * Additional fields to include in server outputs. @@ -115,7 +121,7 @@ export interface RealtimeSessionCreateResponse { /** * Tools available to the model. */ - tools?: Array; + tools?: Array; /** * Realtime API can write session traces to the @@ -132,11 +138,6 @@ export interface RealtimeSessionCreateResponse { * The default is `auto`. */ truncation?: RealtimeAPI.RealtimeTruncation; - - /** - * The type of session to create. Always `realtime` for the Realtime API. - */ - type?: 'realtime'; } export namespace RealtimeSessionCreateResponse { @@ -238,7 +239,7 @@ export namespace RealtimeSessionCreateResponse { /** * Optional idle timeout after which turn detection will auto-timeout when no - * additional audio is received. + * additional audio is received and emits a `timeout_triggered` event. */ idle_timeout_ms?: number | null; @@ -491,87 +492,90 @@ export namespace RealtimeSessionCreateResponse { } /** - * Ephemeral key returned by the API. Only present when the session is created on - * the server via REST API. + * A Realtime transcription session configuration object. */ -export interface RealtimeTranscriptionSessionClientSecret { - /** - * Timestamp for when the token expires. Currently, all tokens expire after one - * minute. - */ - expires_at: number; - +export interface RealtimeTranscriptionSessionCreateResponse { /** - * Ephemeral key usable in client environments to authenticate connections to the - * Realtime API. Use this in client-side environments rather than a standard API - * token, which should only be used server-side. + * Unique identifier for the session that looks like `sess_1234567890abcdef`. */ - value: string; -} + id: string; -/** - * A new Realtime transcription session configuration. - * - * When a session is created on the server via REST API, the session object also - * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is - * not present when a session is updated via the WebSocket API. - */ -export interface RealtimeTranscriptionSessionCreateResponse { /** - * Ephemeral key returned by the API. Only present when the session is created on - * the server via REST API. + * The object type. Always `realtime.transcription_session`. */ - client_secret: RealtimeTranscriptionSessionClientSecret; + object: string; /** - * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + * The type of session. Always `transcription` for transcription sessions. */ - input_audio_format?: string; + type: 'transcription'; /** - * Configuration of the transcription model. + * Configuration for input audio for the session. */ - input_audio_transcription?: RealtimeTranscriptionSessionInputAudioTranscription; + audio?: RealtimeTranscriptionSessionCreateResponse.Audio; /** - * The set of modalities the model can respond with. To disable audio, set this to - * ["text"]. + * Expiration timestamp for the session, in seconds since epoch. */ - modalities?: Array<'text' | 'audio'>; + expires_at?: number; /** - * Configuration for turn detection. Can be set to `null` to turn off. Server VAD - * means that the model will detect the start and end of speech based on audio - * volume and respond at the end of user speech. + * Additional fields to include in server outputs. + * + * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio + * transcription. */ - turn_detection?: RealtimeTranscriptionSessionTurnDetection; + include?: Array<'item.input_audio_transcription.logprobs'>; } -/** - * Configuration of the transcription model. - */ -export interface RealtimeTranscriptionSessionInputAudioTranscription { +export namespace RealtimeTranscriptionSessionCreateResponse { /** - * The language of the input audio. Supplying the input language in - * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - * format will improve accuracy and latency. + * Configuration for input audio for the session. */ - language?: string; + export interface Audio { + input?: Audio.Input; + } - /** - * The model to use for transcription. Current options are `whisper-1`, - * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. - */ - model?: 'whisper-1' | 'gpt-4o-transcribe-latest' | 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe'; + export namespace Audio { + export interface Input { + /** + * The PCM audio format. Only a 24kHz sample rate is supported. + */ + format?: RealtimeAPI.RealtimeAudioFormats; - /** - * An optional text to guide the model's style or continue a previous audio - * segment. For `whisper-1`, the - * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - * For `gpt-4o-transcribe` models, the prompt is a free text string, for example - * "expect words related to technology". - */ - prompt?: string; + /** + * Configuration for input audio noise reduction. + */ + noise_reduction?: Input.NoiseReduction; + + /** + * Configuration of the transcription model. + */ + transcription?: RealtimeAPI.AudioTranscription; + + /** + * Configuration for turn detection. Can be set to `null` to turn off. Server VAD + * means that the model will detect the start and end of speech based on audio + * volume and respond at the end of user speech. + */ + turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection; + } + + export namespace Input { + /** + * Configuration for input audio noise reduction. + */ + export interface NoiseReduction { + /** + * Type of noise reduction. `near_field` is for close-talking microphones such as + * headphones, `far_field` is for far-field microphones such as laptop or + * conference room microphones. + */ + type?: RealtimeAPI.NoiseReductionType; + } + } + } } /** @@ -670,9 +674,7 @@ export declare namespace ClientSecrets { export { type RealtimeSessionClientSecret as RealtimeSessionClientSecret, type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse, - type RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret, type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse, - type RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription, type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection, type ClientSecretCreateResponse as ClientSecretCreateResponse, type ClientSecretCreateParams as ClientSecretCreateParams, diff --git a/src/resources/realtime/index.ts b/src/resources/realtime/index.ts index 550532500..777543853 100644 --- a/src/resources/realtime/index.ts +++ b/src/resources/realtime/index.ts @@ -4,9 +4,7 @@ export { ClientSecrets, type RealtimeSessionClientSecret, type RealtimeSessionCreateResponse, - type RealtimeTranscriptionSessionClientSecret, type RealtimeTranscriptionSessionCreateResponse, - type RealtimeTranscriptionSessionInputAudioTranscription, type RealtimeTranscriptionSessionTurnDetection, type ClientSecretCreateResponse, type ClientSecretCreateParams, diff --git a/src/resources/realtime/realtime.ts b/src/resources/realtime/realtime.ts index 9dee11e11..12f1f6848 100644 --- a/src/resources/realtime/realtime.ts +++ b/src/resources/realtime/realtime.ts @@ -10,9 +10,7 @@ import { ClientSecrets, RealtimeSessionClientSecret, RealtimeSessionCreateResponse, - RealtimeTranscriptionSessionClientSecret, RealtimeTranscriptionSessionCreateResponse, - RealtimeTranscriptionSessionInputAudioTranscription, RealtimeTranscriptionSessionTurnDetection, } from './client-secrets'; import * as ResponsesAPI from '../responses/responses'; @@ -1044,29 +1042,6 @@ export interface McpListToolsInProgress { type: 'mcp_list_tools.in_progress'; } -export interface Models { - /** - * The description of the function, including guidance on when and how to call it, - * and guidance about what to tell the user when calling (if anything). - */ - description?: string; - - /** - * The name of the function. - */ - name?: string; - - /** - * Parameters of the function in JSON Schema. - */ - parameters?: unknown; - - /** - * The type of the tool, i.e. `function`. - */ - type?: 'function'; -} - /** * Type of noise reduction. `near_field` is for close-talking microphones such as * headphones, `far_field` is for far-field microphones such as laptop or @@ -1320,7 +1295,7 @@ export interface RealtimeAudioInputTurnDetection { /** * Optional idle timeout after which turn detection will auto-timeout when no - * additional audio is received. + * additional audio is received and emits a `timeout_triggered` event. */ idle_timeout_ms?: number | null; @@ -1371,8 +1346,7 @@ export type RealtimeClientEvent = | InputAudioBufferCommitEvent | ResponseCancelEvent | ResponseCreateEvent - | SessionUpdateEvent - | TranscriptionSessionUpdate; + | SessionUpdateEvent; /** * An assistant message item in a Realtime conversation. @@ -1706,6 +1680,29 @@ export interface RealtimeErrorEvent { type: 'error'; } +export interface RealtimeFunctionTool { + /** + * The description of the function, including guidance on when and how to call it, + * and guidance about what to tell the user when calling (if anything). + */ + description?: string; + + /** + * The name of the function. + */ + name?: string; + + /** + * Parameters of the function in JSON Schema. + */ + parameters?: unknown; + + /** + * The type of the tool, i.e. `function`. + */ + type?: 'function'; +} + /** * A Realtime item requesting human approval of a tool invocation. */ @@ -2267,7 +2264,7 @@ export interface RealtimeResponseCreateParams { /** * Tools available to the model. */ - tools?: Array; + tools?: Array; } /** @@ -2457,8 +2454,6 @@ export type RealtimeServerEvent = | ResponseTextDoneEvent | SessionCreatedEvent | SessionUpdatedEvent - | TranscriptionSessionUpdatedEvent - | TranscriptionSessionCreated | RealtimeServerEvent.OutputAudioBufferStarted | RealtimeServerEvent.OutputAudioBufferStopped | RealtimeServerEvent.OutputAudioBufferCleared @@ -2702,7 +2697,7 @@ export interface RealtimeSession { /** * Tools (functions) available to the model. */ - tools?: Array; + tools?: Array; /** * Configuration options for tracing. Set to null to disable tracing. Once tracing @@ -2971,7 +2966,7 @@ export type RealtimeToolsConfig = Array; * (MCP) servers. * [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). */ -export type RealtimeToolsConfigUnion = Models | RealtimeToolsConfigUnion.Mcp; +export type RealtimeToolsConfigUnion = RealtimeFunctionTool | RealtimeToolsConfigUnion.Mcp; export namespace RealtimeToolsConfigUnion { /** @@ -4204,30 +4199,6 @@ export interface SessionUpdatedEvent { type: 'session.updated'; } -/** - * Returned when a transcription session is created. - */ -export interface TranscriptionSessionCreated { - /** - * The unique ID of the server event. - */ - event_id: string; - - /** - * A new Realtime transcription session configuration. - * - * When a session is created on the server via REST API, the session object also - * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is - * not present when a session is updated via the WebSocket API. - */ - session: ClientSecretsAPI.RealtimeTranscriptionSessionCreateResponse; - - /** - * The event type, must be `transcription_session.created`. - */ - type: 'transcription_session.created'; -} - /** * Send this event to update a transcription session. */ @@ -4359,7 +4330,7 @@ export interface TranscriptionSessionUpdatedEvent { * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is * not present when a session is updated via the WebSocket API. */ - session: ClientSecretsAPI.RealtimeTranscriptionSessionCreateResponse; + session: TranscriptionSessionUpdatedEvent.Session; /** * The event type, must be `transcription_session.updated`. @@ -4367,6 +4338,99 @@ export interface TranscriptionSessionUpdatedEvent { type: 'transcription_session.updated'; } +export namespace TranscriptionSessionUpdatedEvent { + /** + * A new Realtime transcription session configuration. + * + * When a session is created on the server via REST API, the session object also + * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is + * not present when a session is updated via the WebSocket API. + */ + export interface Session { + /** + * Ephemeral key returned by the API. Only present when the session is created on + * the server via REST API. + */ + client_secret: Session.ClientSecret; + + /** + * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + */ + input_audio_format?: string; + + /** + * Configuration of the transcription model. + */ + input_audio_transcription?: RealtimeAPI.AudioTranscription; + + /** + * The set of modalities the model can respond with. To disable audio, set this to + * ["text"]. + */ + modalities?: Array<'text' | 'audio'>; + + /** + * Configuration for turn detection. Can be set to `null` to turn off. Server VAD + * means that the model will detect the start and end of speech based on audio + * volume and respond at the end of user speech. + */ + turn_detection?: Session.TurnDetection; + } + + export namespace Session { + /** + * Ephemeral key returned by the API. Only present when the session is created on + * the server via REST API. + */ + export interface ClientSecret { + /** + * Timestamp for when the token expires. Currently, all tokens expire after one + * minute. + */ + expires_at: number; + + /** + * Ephemeral key usable in client environments to authenticate connections to the + * Realtime API. Use this in client-side environments rather than a standard API + * token, which should only be used server-side. + */ + value: string; + } + + /** + * Configuration for turn detection. Can be set to `null` to turn off. Server VAD + * means that the model will detect the start and end of speech based on audio + * volume and respond at the end of user speech. + */ + export interface TurnDetection { + /** + * Amount of audio to include before the VAD detected speech (in milliseconds). + * Defaults to 300ms. + */ + prefix_padding_ms?: number; + + /** + * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. + * With shorter values the model will respond more quickly, but may jump in on + * short pauses from the user. + */ + silence_duration_ms?: number; + + /** + * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher + * threshold will require louder audio to activate the model, and thus might + * perform better in noisy environments. + */ + threshold?: number; + + /** + * Type of turn detection, only `server_vad` is currently supported. + */ + type?: string; + } + } +} + Realtime.ClientSecrets = ClientSecrets; export declare namespace Realtime { @@ -4400,7 +4464,6 @@ export declare namespace Realtime { type McpListToolsCompleted as McpListToolsCompleted, type McpListToolsFailed as McpListToolsFailed, type McpListToolsInProgress as McpListToolsInProgress, - type Models as Models, type NoiseReductionType as NoiseReductionType, type OutputAudioBufferClearEvent as OutputAudioBufferClearEvent, type RateLimitsUpdatedEvent as RateLimitsUpdatedEvent, @@ -4417,6 +4480,7 @@ export declare namespace Realtime { type RealtimeConversationItemUserMessage as RealtimeConversationItemUserMessage, type RealtimeError as RealtimeError, type RealtimeErrorEvent as RealtimeErrorEvent, + type RealtimeFunctionTool as RealtimeFunctionTool, type RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest, type RealtimeMcpApprovalResponse as RealtimeMcpApprovalResponse, type RealtimeMcpListTools as RealtimeMcpListTools, @@ -4469,7 +4533,6 @@ export declare namespace Realtime { type SessionCreatedEvent as SessionCreatedEvent, type SessionUpdateEvent as SessionUpdateEvent, type SessionUpdatedEvent as SessionUpdatedEvent, - type TranscriptionSessionCreated as TranscriptionSessionCreated, type TranscriptionSessionUpdate as TranscriptionSessionUpdate, type TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent, }; @@ -4478,9 +4541,7 @@ export declare namespace Realtime { ClientSecrets as ClientSecrets, type RealtimeSessionClientSecret as RealtimeSessionClientSecret, type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse, - type RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret, type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse, - type RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription, type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection, type ClientSecretCreateResponse as ClientSecretCreateResponse, type ClientSecretCreateParams as ClientSecretCreateParams, From 4128b2ef1d36917a04b56faa8cdf254119f84674 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 10 Sep 2025 13:34:21 +0000 Subject: [PATCH 2/2] release: 5.20.1 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 8 ++++++++ jsr.json | 2 +- package.json | 2 +- src/version.ts | 2 +- 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index dec479608..83fac5c78 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "5.20.0" + ".": "5.20.1" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 02919cc52..b0daffdad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 5.20.1 (2025-09-10) + +Full Changelog: [v5.20.0...v5.20.1](https://github.com/openai/openai-node/compare/v5.20.0...v5.20.1) + +### Chores + +* **api:** fix realtime GA types ([1c0d314](https://github.com/openai/openai-node/commit/1c0d3145e920a55f3c710323849bcac1fd60b703)) + ## 5.20.0 (2025-09-08) Full Changelog: [v5.19.1...v5.20.0](https://github.com/openai/openai-node/compare/v5.19.1...v5.20.0) diff --git a/jsr.json b/jsr.json index 43571736b..af3e71220 100644 --- a/jsr.json +++ b/jsr.json @@ -1,6 +1,6 @@ { "name": "@openai/openai", - "version": "5.20.0", + "version": "5.20.1", "exports": { ".": "./index.ts", "./helpers/zod": "./helpers/zod.ts", diff --git a/package.json b/package.json index 340fba521..7dac7c900 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "openai", - "version": "5.20.0", + "version": "5.20.1", "description": "The official TypeScript library for the OpenAI API", "author": "OpenAI ", "types": "dist/index.d.ts", diff --git a/src/version.ts b/src/version.ts index 36168d9b4..95318e579 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '5.20.0'; // x-release-please-version +export const VERSION = '5.20.1'; // x-release-please-version