Skip to content

Commit 18fd902

Browse files
authored
fix: #495 Realtime session config falls back to legacy format when voice is set (#497)
1 parent 5f4e139 commit 18fd902

File tree

3 files changed

+119
-20
lines changed

3 files changed

+119
-20
lines changed

.changeset/all-mangos-send.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
fix: #495 Realtime session config falls back to legacy format when voice is set

packages/agents-realtime/src/clientMessages.ts

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@ export type RealtimeSessionConfigCommon = {
133133
export type RealtimeSessionConfigDefinition = RealtimeSessionConfigCommon & {
134134
outputModalities?: ('text' | 'audio')[];
135135
audio?: RealtimeAudioConfig;
136+
/**
137+
* TODO: We'll eventually migrate to audio.output.voice instead of this property.
138+
* Until we fully migrate to audio.output.voice for all session implementations,
139+
* using this top-level voice property helps with backwards compatibility.
140+
*/
141+
voice?: string;
136142
};
137143

138144
// Deprecated config (legacy) — cannot be mixed with new fields
@@ -175,7 +181,6 @@ function isDeprecatedConfig(
175181
): config is Partial<RealtimeSessionConfigDeprecated> {
176182
return (
177183
isDefined('modalities', config) ||
178-
isDefined('voice', config) ||
179184
isDefined('inputAudioFormat', config) ||
180185
isDefined('outputAudioFormat', config) ||
181186
isDefined('inputAudioTranscription', config) ||
@@ -193,6 +198,25 @@ export function toNewSessionConfig(
193198
config: Partial<RealtimeSessionConfig>,
194199
): Partial<RealtimeSessionConfigDefinition> {
195200
if (!isDeprecatedConfig(config)) {
201+
const inputConfig = config.audio?.input
202+
? {
203+
format: normalizeAudioFormat(config.audio.input.format),
204+
noiseReduction: config.audio.input.noiseReduction ?? null,
205+
transcription: config.audio.input.transcription,
206+
turnDetection: config.audio.input.turnDetection,
207+
}
208+
: undefined;
209+
210+
const requestedOutputVoice = config.audio?.output?.voice ?? config.voice;
211+
const outputConfig =
212+
config.audio?.output || typeof requestedOutputVoice !== 'undefined'
213+
? {
214+
format: normalizeAudioFormat(config.audio?.output?.format),
215+
voice: requestedOutputVoice,
216+
speed: config.audio?.output?.speed,
217+
}
218+
: undefined;
219+
196220
return {
197221
model: config.model,
198222
instructions: config.instructions,
@@ -202,25 +226,13 @@ export function toNewSessionConfig(
202226
providerData: config.providerData,
203227
prompt: config.prompt,
204228
outputModalities: config.outputModalities,
205-
audio: config.audio
206-
? {
207-
input: config.audio.input
208-
? {
209-
format: normalizeAudioFormat(config.audio.input.format),
210-
noiseReduction: config.audio.input.noiseReduction ?? null,
211-
transcription: config.audio.input.transcription,
212-
turnDetection: config.audio.input.turnDetection,
213-
}
214-
: undefined,
215-
output: config.audio.output
216-
? {
217-
format: normalizeAudioFormat(config.audio.output.format),
218-
voice: config.audio.output.voice,
219-
speed: config.audio.output.speed,
220-
}
221-
: undefined,
222-
}
223-
: undefined,
229+
audio:
230+
inputConfig || outputConfig
231+
? {
232+
input: inputConfig,
233+
output: outputConfig,
234+
}
235+
: undefined,
224236
};
225237
}
226238

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { toNewSessionConfig } from '../src/clientMessages';
3+
import { RealtimeAgent } from '../src/realtimeAgent';
4+
import { RealtimeSession } from '../src/realtimeSession';
5+
import { OpenAIRealtimeBase } from '../src/openaiRealtimeBase';
6+
import type { RealtimeClientMessage } from '../src/clientMessages';
7+
8+
const TELEPHONY_AUDIO_FORMAT = { type: 'audio/pcmu' as const };
9+
10+
class CapturingTransport extends OpenAIRealtimeBase {
11+
status: 'connected' | 'disconnected' | 'connecting' | 'disconnecting' =
12+
'disconnected';
13+
mergedConfig: any = null;
14+
events: RealtimeClientMessage[] = [];
15+
16+
async connect(options: { initialSessionConfig?: any }) {
17+
this.mergedConfig = (this as any)._getMergedSessionConfig(
18+
options.initialSessionConfig ?? {},
19+
);
20+
}
21+
22+
sendEvent(event: RealtimeClientMessage) {
23+
this.events.push(event);
24+
}
25+
26+
mute() {}
27+
close() {}
28+
interrupt() {}
29+
30+
get muted() {
31+
return false;
32+
}
33+
}
34+
35+
describe('Realtime session voice config regression', () => {
36+
it('drops GA audio formats when top-level voice is present', () => {
37+
const converted = toNewSessionConfig({
38+
voice: 'alloy',
39+
audio: {
40+
input: { format: TELEPHONY_AUDIO_FORMAT },
41+
output: { format: TELEPHONY_AUDIO_FORMAT },
42+
},
43+
});
44+
45+
expect(converted.audio?.input?.format).toEqual(TELEPHONY_AUDIO_FORMAT);
46+
expect(converted.audio?.output?.format).toEqual(TELEPHONY_AUDIO_FORMAT);
47+
expect(converted.audio?.output?.voice).toBe('alloy');
48+
});
49+
50+
it('resets audio formats when connecting a session for an agent with voice configured', async () => {
51+
const transport = new CapturingTransport();
52+
const agent = new RealtimeAgent({
53+
name: 'voice-agent',
54+
instructions: 'Respond cheerfully.',
55+
voice: 'alloy',
56+
});
57+
58+
const session = new RealtimeSession(agent, {
59+
transport,
60+
model: 'gpt-realtime',
61+
config: {
62+
audio: {
63+
input: { format: TELEPHONY_AUDIO_FORMAT },
64+
output: {
65+
format: TELEPHONY_AUDIO_FORMAT,
66+
voice: 'marin',
67+
},
68+
},
69+
},
70+
});
71+
72+
await session.connect({ apiKey: 'dummy-key' });
73+
74+
expect(transport.mergedConfig?.audio?.input?.format).toEqual(
75+
TELEPHONY_AUDIO_FORMAT,
76+
);
77+
expect(transport.mergedConfig?.audio?.output?.format).toEqual(
78+
TELEPHONY_AUDIO_FORMAT,
79+
);
80+
expect(transport.mergedConfig?.audio?.output?.voice).toBe('marin');
81+
});
82+
});

0 commit comments

Comments
 (0)