Skip to content

Commit 2dd1924

Browse files
committed
fix: #675 top-level voice param in realtime session confid does not work
1 parent ef4a354 commit 2dd1924

File tree

3 files changed

+70
-3
lines changed

3 files changed

+70
-3
lines changed

.changeset/tough-readers-create.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
fix: #675 top-level voice param in realtime session confid does not work

packages/agents-realtime/src/realtimeSession.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { RuntimeEventEmitter } from '@openai/agents-core/_shims';
1414
import { isZodObject, toSmartString } from '@openai/agents-core/utils';
1515
import type {
1616
RealtimeSessionConfig,
17+
RealtimeSessionConfigDefinition,
1718
RealtimeToolDefinition,
1819
RealtimeTracingConfig,
1920
RealtimeUserInput,
@@ -346,9 +347,20 @@ export class RealtimeSession<
346347
async #getSessionConfig(
347348
additionalConfig: Partial<RealtimeSessionConfig> = {},
348349
): Promise<Partial<RealtimeSessionConfig>> {
350+
const overridesConfig: Partial<RealtimeSessionConfig> =
351+
additionalConfig ?? {};
352+
const optionsConfig: Partial<RealtimeSessionConfig> =
353+
this.options.config ?? {};
349354
const instructions = await this.#currentAgent.getSystemPrompt(
350355
this.#context,
351356
);
357+
const getAudioOutputVoiceOverride = (
358+
config: Partial<RealtimeSessionConfig>,
359+
): string | undefined => {
360+
const audioConfig = (config as Partial<RealtimeSessionConfigDefinition>)
361+
.audio;
362+
return audioConfig?.output?.voice;
363+
};
352364

353365
// Realtime expects tracing to be explicitly null to disable it; leaving the previous config
354366
// in place would otherwise continue emitting spans.
@@ -374,6 +386,17 @@ export class RealtimeSession<
374386
);
375387
}
376388

389+
const audioOutputVoiceOverride =
390+
getAudioOutputVoiceOverride(overridesConfig) ??
391+
getAudioOutputVoiceOverride(optionsConfig);
392+
const topLevelVoiceOverride = overridesConfig.voice ?? optionsConfig.voice;
393+
const resolvedVoice =
394+
typeof audioOutputVoiceOverride !== 'undefined'
395+
? audioOutputVoiceOverride
396+
: typeof topLevelVoiceOverride !== 'undefined'
397+
? topLevelVoiceOverride
398+
: this.#currentAgent.voice;
399+
377400
// Start from any previously-sent config (so we preserve values like audio formats)
378401
// and the original options.config provided by the user. Preference order:
379402
// 1. Last session config we sent (#lastSessionConfig)
@@ -383,15 +406,15 @@ export class RealtimeSession<
383406
// to ensure they always reflect the current agent & runtime state.
384407
const base: Partial<RealtimeSessionConfig> = {
385408
...(this.#lastSessionConfig ?? {}),
386-
...(this.options.config ?? {}),
387-
...(additionalConfig ?? {}),
409+
...optionsConfig,
410+
...overridesConfig,
388411
};
389412

390413
// Note: Certain fields cannot be updated after the session begins, such as voice and model
391414
const fullConfig: Partial<RealtimeSessionConfig> = {
392415
...base,
393416
instructions,
394-
voice: this.#currentAgent.voice,
417+
voice: resolvedVoice,
395418
model: this.options.model,
396419
tools: this.#currentTools,
397420
tracing: tracingConfig,

packages/agents-realtime/test/realtimeVoiceConfigRegression.test.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,43 @@ describe('Realtime session voice config regression', () => {
7979
);
8080
expect(transport.mergedConfig?.audio?.output?.voice).toBe('marin');
8181
});
82+
83+
it('prefers config.voice over the agent voice when no audio output override exists', async () => {
84+
const transport = new CapturingTransport();
85+
const agent = new RealtimeAgent({
86+
name: 'voice-agent',
87+
instructions: 'Respond cheerfully.',
88+
voice: 'alloy',
89+
});
90+
91+
const session = new RealtimeSession(agent, {
92+
transport,
93+
model: 'gpt-realtime',
94+
config: {
95+
voice: 'verse',
96+
},
97+
});
98+
99+
await session.connect({ apiKey: 'dummy-key' });
100+
101+
expect(transport.mergedConfig?.audio?.output?.voice).toBe('verse');
102+
});
103+
104+
it('falls back to the agent voice when no config overrides are provided', async () => {
105+
const transport = new CapturingTransport();
106+
const agent = new RealtimeAgent({
107+
name: 'voice-agent',
108+
instructions: 'Respond cheerfully.',
109+
voice: 'alloy',
110+
});
111+
112+
const session = new RealtimeSession(agent, {
113+
transport,
114+
model: 'gpt-realtime',
115+
});
116+
117+
await session.connect({ apiKey: 'dummy-key' });
118+
119+
expect(transport.mergedConfig?.audio?.output?.voice).toBe('alloy');
120+
});
82121
});

0 commit comments

Comments
 (0)