From 1c0d3145e920a55f3c710323849bcac1fd60b703 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 10 Sep 2025 13:33:47 +0000
Subject: [PATCH 1/2] chore(api): fix realtime GA types
---
.stats.yml | 6 +-
api.md | 5 +-
src/resources/realtime/client-secrets.ts | 142 ++++++++---------
src/resources/realtime/index.ts | 2 -
src/resources/realtime/realtime.ts | 185 +++++++++++++++--------
5 files changed, 199 insertions(+), 141 deletions(-)
diff --git a/.stats.yml b/.stats.yml
index 36a3c7f58..2aa16be87 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-7807ec6037efcee1af7decbfd3974a42b761fb6c6a71b4050fe43484d7fcbac4.yml
-openapi_spec_hash: da6851e3891ad2659a50ed6a736fd32a
-config_hash: 74d955cdc2377213f5268ea309090f6c
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
+openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+config_hash: 930dac3aa861344867e4ac84f037b5df
diff --git a/api.md b/api.md
index 0e1134d94..2f274e7cf 100644
--- a/api.md
+++ b/api.md
@@ -805,7 +805,6 @@ Types:
- McpListToolsCompleted
- McpListToolsFailed
- McpListToolsInProgress
-- Models
- NoiseReductionType
- OutputAudioBufferClearEvent
- RateLimitsUpdatedEvent
@@ -822,6 +821,7 @@ Types:
- RealtimeConversationItemUserMessage
- RealtimeError
- RealtimeErrorEvent
+- RealtimeFunctionTool
- RealtimeMcpApprovalRequest
- RealtimeMcpApprovalResponse
- RealtimeMcpListTools
@@ -874,7 +874,6 @@ Types:
- SessionCreatedEvent
- SessionUpdateEvent
- SessionUpdatedEvent
-- TranscriptionSessionCreated
- TranscriptionSessionUpdate
- TranscriptionSessionUpdatedEvent
@@ -884,9 +883,7 @@ Types:
- RealtimeSessionClientSecret
- RealtimeSessionCreateResponse
-- RealtimeTranscriptionSessionClientSecret
- RealtimeTranscriptionSessionCreateResponse
-- RealtimeTranscriptionSessionInputAudioTranscription
- RealtimeTranscriptionSessionTurnDetection
- ClientSecretCreateResponse
diff --git a/src/resources/realtime/client-secrets.ts b/src/resources/realtime/client-secrets.ts
index 6539260ac..5c53b2e5a 100644
--- a/src/resources/realtime/client-secrets.ts
+++ b/src/resources/realtime/client-secrets.ts
@@ -1,6 +1,7 @@
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
import { APIResource } from '../../core/resource';
+import * as ClientSecretsAPI from './client-secrets';
import * as RealtimeAPI from './realtime';
import * as ResponsesAPI from '../responses/responses';
import { APIPromise } from '../../core/api-promise';
@@ -39,14 +40,19 @@ export interface RealtimeSessionClientSecret {
*/
export interface RealtimeSessionCreateResponse {
/**
- * Configuration for input and output audio.
+ * Ephemeral key returned by the API.
*/
- audio?: RealtimeSessionCreateResponse.Audio;
+ client_secret: RealtimeSessionClientSecret;
/**
- * Ephemeral key returned by the API.
+ * The type of session to create. Always `realtime` for the Realtime API.
+ */
+ type: 'realtime';
+
+ /**
+ * Configuration for input and output audio.
*/
- client_secret?: RealtimeSessionClientSecret;
+ audio?: RealtimeSessionCreateResponse.Audio;
/**
* Additional fields to include in server outputs.
@@ -115,7 +121,7 @@ export interface RealtimeSessionCreateResponse {
/**
* Tools available to the model.
*/
- tools?: Array;
+ tools?: Array;
/**
* Realtime API can write session traces to the
@@ -132,11 +138,6 @@ export interface RealtimeSessionCreateResponse {
* The default is `auto`.
*/
truncation?: RealtimeAPI.RealtimeTruncation;
-
- /**
- * The type of session to create. Always `realtime` for the Realtime API.
- */
- type?: 'realtime';
}
export namespace RealtimeSessionCreateResponse {
@@ -238,7 +239,7 @@ export namespace RealtimeSessionCreateResponse {
/**
* Optional idle timeout after which turn detection will auto-timeout when no
- * additional audio is received.
+ * additional audio is received and emits a `timeout_triggered` event.
*/
idle_timeout_ms?: number | null;
@@ -491,87 +492,90 @@ export namespace RealtimeSessionCreateResponse {
}
/**
- * Ephemeral key returned by the API. Only present when the session is created on
- * the server via REST API.
+ * A Realtime transcription session configuration object.
*/
-export interface RealtimeTranscriptionSessionClientSecret {
- /**
- * Timestamp for when the token expires. Currently, all tokens expire after one
- * minute.
- */
- expires_at: number;
-
+export interface RealtimeTranscriptionSessionCreateResponse {
/**
- * Ephemeral key usable in client environments to authenticate connections to the
- * Realtime API. Use this in client-side environments rather than a standard API
- * token, which should only be used server-side.
+ * Unique identifier for the session that looks like `sess_1234567890abcdef`.
*/
- value: string;
-}
+ id: string;
-/**
- * A new Realtime transcription session configuration.
- *
- * When a session is created on the server via REST API, the session object also
- * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
- * not present when a session is updated via the WebSocket API.
- */
-export interface RealtimeTranscriptionSessionCreateResponse {
/**
- * Ephemeral key returned by the API. Only present when the session is created on
- * the server via REST API.
+ * The object type. Always `realtime.transcription_session`.
*/
- client_secret: RealtimeTranscriptionSessionClientSecret;
+ object: string;
/**
- * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+ * The type of session. Always `transcription` for transcription sessions.
*/
- input_audio_format?: string;
+ type: 'transcription';
/**
- * Configuration of the transcription model.
+ * Configuration for input audio for the session.
*/
- input_audio_transcription?: RealtimeTranscriptionSessionInputAudioTranscription;
+ audio?: RealtimeTranscriptionSessionCreateResponse.Audio;
/**
- * The set of modalities the model can respond with. To disable audio, set this to
- * ["text"].
+ * Expiration timestamp for the session, in seconds since epoch.
*/
- modalities?: Array<'text' | 'audio'>;
+ expires_at?: number;
/**
- * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
- * means that the model will detect the start and end of speech based on audio
- * volume and respond at the end of user speech.
+ * Additional fields to include in server outputs.
+ *
+ * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+ * transcription.
*/
- turn_detection?: RealtimeTranscriptionSessionTurnDetection;
+ include?: Array<'item.input_audio_transcription.logprobs'>;
}
-/**
- * Configuration of the transcription model.
- */
-export interface RealtimeTranscriptionSessionInputAudioTranscription {
+export namespace RealtimeTranscriptionSessionCreateResponse {
/**
- * The language of the input audio. Supplying the input language in
- * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
- * format will improve accuracy and latency.
+ * Configuration for input audio for the session.
*/
- language?: string;
+ export interface Audio {
+ input?: Audio.Input;
+ }
- /**
- * The model to use for transcription. Current options are `whisper-1`,
- * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
- */
- model?: 'whisper-1' | 'gpt-4o-transcribe-latest' | 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe';
+ export namespace Audio {
+ export interface Input {
+ /**
+ * The PCM audio format. Only a 24kHz sample rate is supported.
+ */
+ format?: RealtimeAPI.RealtimeAudioFormats;
- /**
- * An optional text to guide the model's style or continue a previous audio
- * segment. For `whisper-1`, the
- * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
- * For `gpt-4o-transcribe` models, the prompt is a free text string, for example
- * "expect words related to technology".
- */
- prompt?: string;
+ /**
+ * Configuration for input audio noise reduction.
+ */
+ noise_reduction?: Input.NoiseReduction;
+
+ /**
+ * Configuration of the transcription model.
+ */
+ transcription?: RealtimeAPI.AudioTranscription;
+
+ /**
+ * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+ * means that the model will detect the start and end of speech based on audio
+ * volume and respond at the end of user speech.
+ */
+ turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection;
+ }
+
+ export namespace Input {
+ /**
+ * Configuration for input audio noise reduction.
+ */
+ export interface NoiseReduction {
+ /**
+ * Type of noise reduction. `near_field` is for close-talking microphones such as
+ * headphones, `far_field` is for far-field microphones such as laptop or
+ * conference room microphones.
+ */
+ type?: RealtimeAPI.NoiseReductionType;
+ }
+ }
+ }
}
/**
@@ -670,9 +674,7 @@ export declare namespace ClientSecrets {
export {
type RealtimeSessionClientSecret as RealtimeSessionClientSecret,
type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse,
- type RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret,
type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse,
- type RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription,
type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection,
type ClientSecretCreateResponse as ClientSecretCreateResponse,
type ClientSecretCreateParams as ClientSecretCreateParams,
diff --git a/src/resources/realtime/index.ts b/src/resources/realtime/index.ts
index 550532500..777543853 100644
--- a/src/resources/realtime/index.ts
+++ b/src/resources/realtime/index.ts
@@ -4,9 +4,7 @@ export {
ClientSecrets,
type RealtimeSessionClientSecret,
type RealtimeSessionCreateResponse,
- type RealtimeTranscriptionSessionClientSecret,
type RealtimeTranscriptionSessionCreateResponse,
- type RealtimeTranscriptionSessionInputAudioTranscription,
type RealtimeTranscriptionSessionTurnDetection,
type ClientSecretCreateResponse,
type ClientSecretCreateParams,
diff --git a/src/resources/realtime/realtime.ts b/src/resources/realtime/realtime.ts
index 9dee11e11..12f1f6848 100644
--- a/src/resources/realtime/realtime.ts
+++ b/src/resources/realtime/realtime.ts
@@ -10,9 +10,7 @@ import {
ClientSecrets,
RealtimeSessionClientSecret,
RealtimeSessionCreateResponse,
- RealtimeTranscriptionSessionClientSecret,
RealtimeTranscriptionSessionCreateResponse,
- RealtimeTranscriptionSessionInputAudioTranscription,
RealtimeTranscriptionSessionTurnDetection,
} from './client-secrets';
import * as ResponsesAPI from '../responses/responses';
@@ -1044,29 +1042,6 @@ export interface McpListToolsInProgress {
type: 'mcp_list_tools.in_progress';
}
-export interface Models {
- /**
- * The description of the function, including guidance on when and how to call it,
- * and guidance about what to tell the user when calling (if anything).
- */
- description?: string;
-
- /**
- * The name of the function.
- */
- name?: string;
-
- /**
- * Parameters of the function in JSON Schema.
- */
- parameters?: unknown;
-
- /**
- * The type of the tool, i.e. `function`.
- */
- type?: 'function';
-}
-
/**
* Type of noise reduction. `near_field` is for close-talking microphones such as
* headphones, `far_field` is for far-field microphones such as laptop or
@@ -1320,7 +1295,7 @@ export interface RealtimeAudioInputTurnDetection {
/**
* Optional idle timeout after which turn detection will auto-timeout when no
- * additional audio is received.
+ * additional audio is received and emits a `timeout_triggered` event.
*/
idle_timeout_ms?: number | null;
@@ -1371,8 +1346,7 @@ export type RealtimeClientEvent =
| InputAudioBufferCommitEvent
| ResponseCancelEvent
| ResponseCreateEvent
- | SessionUpdateEvent
- | TranscriptionSessionUpdate;
+ | SessionUpdateEvent;
/**
* An assistant message item in a Realtime conversation.
@@ -1706,6 +1680,29 @@ export interface RealtimeErrorEvent {
type: 'error';
}
+export interface RealtimeFunctionTool {
+ /**
+ * The description of the function, including guidance on when and how to call it,
+ * and guidance about what to tell the user when calling (if anything).
+ */
+ description?: string;
+
+ /**
+ * The name of the function.
+ */
+ name?: string;
+
+ /**
+ * Parameters of the function in JSON Schema.
+ */
+ parameters?: unknown;
+
+ /**
+ * The type of the tool, i.e. `function`.
+ */
+ type?: 'function';
+}
+
/**
* A Realtime item requesting human approval of a tool invocation.
*/
@@ -2267,7 +2264,7 @@ export interface RealtimeResponseCreateParams {
/**
* Tools available to the model.
*/
- tools?: Array;
+ tools?: Array;
}
/**
@@ -2457,8 +2454,6 @@ export type RealtimeServerEvent =
| ResponseTextDoneEvent
| SessionCreatedEvent
| SessionUpdatedEvent
- | TranscriptionSessionUpdatedEvent
- | TranscriptionSessionCreated
| RealtimeServerEvent.OutputAudioBufferStarted
| RealtimeServerEvent.OutputAudioBufferStopped
| RealtimeServerEvent.OutputAudioBufferCleared
@@ -2702,7 +2697,7 @@ export interface RealtimeSession {
/**
* Tools (functions) available to the model.
*/
- tools?: Array;
+ tools?: Array;
/**
* Configuration options for tracing. Set to null to disable tracing. Once tracing
@@ -2971,7 +2966,7 @@ export type RealtimeToolsConfig = Array;
* (MCP) servers.
* [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
*/
-export type RealtimeToolsConfigUnion = Models | RealtimeToolsConfigUnion.Mcp;
+export type RealtimeToolsConfigUnion = RealtimeFunctionTool | RealtimeToolsConfigUnion.Mcp;
export namespace RealtimeToolsConfigUnion {
/**
@@ -4204,30 +4199,6 @@ export interface SessionUpdatedEvent {
type: 'session.updated';
}
-/**
- * Returned when a transcription session is created.
- */
-export interface TranscriptionSessionCreated {
- /**
- * The unique ID of the server event.
- */
- event_id: string;
-
- /**
- * A new Realtime transcription session configuration.
- *
- * When a session is created on the server via REST API, the session object also
- * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
- * not present when a session is updated via the WebSocket API.
- */
- session: ClientSecretsAPI.RealtimeTranscriptionSessionCreateResponse;
-
- /**
- * The event type, must be `transcription_session.created`.
- */
- type: 'transcription_session.created';
-}
-
/**
* Send this event to update a transcription session.
*/
@@ -4359,7 +4330,7 @@ export interface TranscriptionSessionUpdatedEvent {
* contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
* not present when a session is updated via the WebSocket API.
*/
- session: ClientSecretsAPI.RealtimeTranscriptionSessionCreateResponse;
+ session: TranscriptionSessionUpdatedEvent.Session;
/**
* The event type, must be `transcription_session.updated`.
@@ -4367,6 +4338,99 @@ export interface TranscriptionSessionUpdatedEvent {
type: 'transcription_session.updated';
}
+export namespace TranscriptionSessionUpdatedEvent {
+ /**
+ * A new Realtime transcription session configuration.
+ *
+ * When a session is created on the server via REST API, the session object also
+ * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
+ * not present when a session is updated via the WebSocket API.
+ */
+ export interface Session {
+ /**
+ * Ephemeral key returned by the API. Only present when the session is created on
+ * the server via REST API.
+ */
+ client_secret: Session.ClientSecret;
+
+ /**
+ * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+ */
+ input_audio_format?: string;
+
+ /**
+ * Configuration of the transcription model.
+ */
+ input_audio_transcription?: RealtimeAPI.AudioTranscription;
+
+ /**
+ * The set of modalities the model can respond with. To disable audio, set this to
+ * ["text"].
+ */
+ modalities?: Array<'text' | 'audio'>;
+
+ /**
+ * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+ * means that the model will detect the start and end of speech based on audio
+ * volume and respond at the end of user speech.
+ */
+ turn_detection?: Session.TurnDetection;
+ }
+
+ export namespace Session {
+ /**
+ * Ephemeral key returned by the API. Only present when the session is created on
+ * the server via REST API.
+ */
+ export interface ClientSecret {
+ /**
+ * Timestamp for when the token expires. Currently, all tokens expire after one
+ * minute.
+ */
+ expires_at: number;
+
+ /**
+ * Ephemeral key usable in client environments to authenticate connections to the
+ * Realtime API. Use this in client-side environments rather than a standard API
+ * token, which should only be used server-side.
+ */
+ value: string;
+ }
+
+ /**
+ * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+ * means that the model will detect the start and end of speech based on audio
+ * volume and respond at the end of user speech.
+ */
+ export interface TurnDetection {
+ /**
+ * Amount of audio to include before the VAD detected speech (in milliseconds).
+ * Defaults to 300ms.
+ */
+ prefix_padding_ms?: number;
+
+ /**
+ * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+ * With shorter values the model will respond more quickly, but may jump in on
+ * short pauses from the user.
+ */
+ silence_duration_ms?: number;
+
+ /**
+ * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+ * threshold will require louder audio to activate the model, and thus might
+ * perform better in noisy environments.
+ */
+ threshold?: number;
+
+ /**
+ * Type of turn detection, only `server_vad` is currently supported.
+ */
+ type?: string;
+ }
+ }
+}
+
Realtime.ClientSecrets = ClientSecrets;
export declare namespace Realtime {
@@ -4400,7 +4464,6 @@ export declare namespace Realtime {
type McpListToolsCompleted as McpListToolsCompleted,
type McpListToolsFailed as McpListToolsFailed,
type McpListToolsInProgress as McpListToolsInProgress,
- type Models as Models,
type NoiseReductionType as NoiseReductionType,
type OutputAudioBufferClearEvent as OutputAudioBufferClearEvent,
type RateLimitsUpdatedEvent as RateLimitsUpdatedEvent,
@@ -4417,6 +4480,7 @@ export declare namespace Realtime {
type RealtimeConversationItemUserMessage as RealtimeConversationItemUserMessage,
type RealtimeError as RealtimeError,
type RealtimeErrorEvent as RealtimeErrorEvent,
+ type RealtimeFunctionTool as RealtimeFunctionTool,
type RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest,
type RealtimeMcpApprovalResponse as RealtimeMcpApprovalResponse,
type RealtimeMcpListTools as RealtimeMcpListTools,
@@ -4469,7 +4533,6 @@ export declare namespace Realtime {
type SessionCreatedEvent as SessionCreatedEvent,
type SessionUpdateEvent as SessionUpdateEvent,
type SessionUpdatedEvent as SessionUpdatedEvent,
- type TranscriptionSessionCreated as TranscriptionSessionCreated,
type TranscriptionSessionUpdate as TranscriptionSessionUpdate,
type TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent,
};
@@ -4478,9 +4541,7 @@ export declare namespace Realtime {
ClientSecrets as ClientSecrets,
type RealtimeSessionClientSecret as RealtimeSessionClientSecret,
type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse,
- type RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret,
type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse,
- type RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription,
type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection,
type ClientSecretCreateResponse as ClientSecretCreateResponse,
type ClientSecretCreateParams as ClientSecretCreateParams,
From 4128b2ef1d36917a04b56faa8cdf254119f84674 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 10 Sep 2025 13:34:21 +0000
Subject: [PATCH 2/2] release: 5.20.1
---
.release-please-manifest.json | 2 +-
CHANGELOG.md | 8 ++++++++
jsr.json | 2 +-
package.json | 2 +-
src/version.ts | 2 +-
5 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index dec479608..83fac5c78 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "5.20.0"
+ ".": "5.20.1"
}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 02919cc52..b0daffdad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
# Changelog
+## 5.20.1 (2025-09-10)
+
+Full Changelog: [v5.20.0...v5.20.1](https://github.com/openai/openai-node/compare/v5.20.0...v5.20.1)
+
+### Chores
+
+* **api:** fix realtime GA types ([1c0d314](https://github.com/openai/openai-node/commit/1c0d3145e920a55f3c710323849bcac1fd60b703))
+
## 5.20.0 (2025-09-08)
Full Changelog: [v5.19.1...v5.20.0](https://github.com/openai/openai-node/compare/v5.19.1...v5.20.0)
diff --git a/jsr.json b/jsr.json
index 43571736b..af3e71220 100644
--- a/jsr.json
+++ b/jsr.json
@@ -1,6 +1,6 @@
{
"name": "@openai/openai",
- "version": "5.20.0",
+ "version": "5.20.1",
"exports": {
".": "./index.ts",
"./helpers/zod": "./helpers/zod.ts",
diff --git a/package.json b/package.json
index 340fba521..7dac7c900 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "openai",
- "version": "5.20.0",
+ "version": "5.20.1",
"description": "The official TypeScript library for the OpenAI API",
"author": "OpenAI ",
"types": "dist/index.d.ts",
diff --git a/src/version.ts b/src/version.ts
index 36168d9b4..95318e579 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '5.20.0'; // x-release-please-version
+export const VERSION = '5.20.1'; // x-release-please-version