Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/tough-readers-create.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@openai/agents-realtime': patch
---

fix: #675 top-level voice param in realtime session confid does not work
29 changes: 26 additions & 3 deletions packages/agents-realtime/src/realtimeSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { RuntimeEventEmitter } from '@openai/agents-core/_shims';
import { isZodObject, toSmartString } from '@openai/agents-core/utils';
import type {
RealtimeSessionConfig,
RealtimeSessionConfigDefinition,
RealtimeToolDefinition,
RealtimeTracingConfig,
RealtimeUserInput,
Expand Down Expand Up @@ -346,9 +347,20 @@ export class RealtimeSession<
async #getSessionConfig(
additionalConfig: Partial<RealtimeSessionConfig> = {},
): Promise<Partial<RealtimeSessionConfig>> {
const overridesConfig: Partial<RealtimeSessionConfig> =
additionalConfig ?? {};
const optionsConfig: Partial<RealtimeSessionConfig> =
this.options.config ?? {};
const instructions = await this.#currentAgent.getSystemPrompt(
this.#context,
);
const getAudioOutputVoiceOverride = (
config: Partial<RealtimeSessionConfig>,
): string | undefined => {
const audioConfig = (config as Partial<RealtimeSessionConfigDefinition>)
.audio;
return audioConfig?.output?.voice;
};

// Realtime expects tracing to be explicitly null to disable it; leaving the previous config
// in place would otherwise continue emitting spans.
Expand All @@ -374,6 +386,17 @@ export class RealtimeSession<
);
}

const audioOutputVoiceOverride =
getAudioOutputVoiceOverride(overridesConfig) ??
getAudioOutputVoiceOverride(optionsConfig);
const topLevelVoiceOverride = overridesConfig.voice ?? optionsConfig.voice;
const resolvedVoice =
typeof audioOutputVoiceOverride !== 'undefined'
? audioOutputVoiceOverride
: typeof topLevelVoiceOverride !== 'undefined'
? topLevelVoiceOverride
: this.#currentAgent.voice;

// Start from any previously-sent config (so we preserve values like audio formats)
// and the original options.config provided by the user. Preference order:
// 1. Last session config we sent (#lastSessionConfig)
Expand All @@ -383,15 +406,15 @@ export class RealtimeSession<
// to ensure they always reflect the current agent & runtime state.
const base: Partial<RealtimeSessionConfig> = {
...(this.#lastSessionConfig ?? {}),
...(this.options.config ?? {}),
...(additionalConfig ?? {}),
...optionsConfig,
...overridesConfig,
};

// Note: Certain fields cannot be updated after the session begins, such as voice and model
const fullConfig: Partial<RealtimeSessionConfig> = {
...base,
instructions,
voice: this.#currentAgent.voice,
voice: resolvedVoice,
model: this.options.model,
tools: this.#currentTools,
tracing: tracingConfig,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,43 @@ describe('Realtime session voice config regression', () => {
);
expect(transport.mergedConfig?.audio?.output?.voice).toBe('marin');
});

it('prefers config.voice over the agent voice when no audio output override exists', async () => {
const transport = new CapturingTransport();
const agent = new RealtimeAgent({
name: 'voice-agent',
instructions: 'Respond cheerfully.',
voice: 'alloy',
});

const session = new RealtimeSession(agent, {
transport,
model: 'gpt-realtime',
config: {
voice: 'verse',
},
});

await session.connect({ apiKey: 'dummy-key' });

expect(transport.mergedConfig?.audio?.output?.voice).toBe('verse');
});

it('falls back to the agent voice when no config overrides are provided', async () => {
const transport = new CapturingTransport();
const agent = new RealtimeAgent({
name: 'voice-agent',
instructions: 'Respond cheerfully.',
voice: 'alloy',
});

const session = new RealtimeSession(agent, {
transport,
model: 'gpt-realtime',
});

await session.connect({ apiKey: 'dummy-key' });

expect(transport.mergedConfig?.audio?.output?.voice).toBe('alloy');
});
});