Skip to content

Commit a0b1f3b

Browse files
fix(realtime-session): preserve audio format and config fields on agent update (#318)
Co-authored-by: Kazuhiro Sera <[email protected]>
1 parent 1bb4d86 commit a0b1f3b

File tree

3 files changed

+53
-2
lines changed

3 files changed

+53
-2
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
fix(realtime-session): preserve audio format & other session config fields on agent update

packages/agents-realtime/src/realtimeSession.ts

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,13 @@ export class RealtimeSession<
183183
#shouldIncludeAudioData: boolean;
184184
#interruptedByGuardrail: Record<string, boolean> = {};
185185
#audioStarted = false;
186+
// Keeps track of the last full session config we sent (camelCase keys) so that
187+
// subsequent updates (e.g. during agent handoffs) preserve properties that are
188+
// not explicitly recalculated here (such as inputAudioFormat, outputAudioFormat,
189+
// modalities, speed, toolChoice, turnDetection, etc.). Without this, updating
190+
// the agent would drop audio format overrides (e.g. g711_ulaw) and revert to
191+
// transport defaults causing issues for integrations like Twilio.
192+
#lastSessionConfig: Partial<RealtimeSessionConfig> | null = null;
186193

187194
constructor(
188195
public readonly initialAgent:
@@ -313,14 +320,34 @@ export class RealtimeSession<
313320
);
314321
}
315322

316-
return {
323+
// Start from any previously-sent config (so we preserve values like audio formats)
324+
// and the original options.config provided by the user. Preference order:
325+
// 1. Last session config we sent (#lastSessionConfig)
326+
// 2. Original options.config
327+
// 3. Additional config passed into this invocation (explicit overrides)
328+
// Finally we overwrite dynamic fields (instructions, voice, model, tools, tracing)
329+
// to ensure they always reflect the current agent & runtime state.
330+
const base: Partial<RealtimeSessionConfig> = {
331+
...(this.#lastSessionConfig ?? {}),
332+
...(this.options.config ?? {}),
333+
...(additionalConfig ?? {}),
334+
};
335+
336+
// Note: Certain fields cannot be updated after the session begins, such as voice and model
337+
const fullConfig: Partial<RealtimeSessionConfig> = {
338+
...base,
317339
instructions,
318340
voice: this.#currentAgent.voice,
319341
model: this.options.model,
320342
tools: this.#currentTools,
321343
tracing: tracingConfig,
322-
...additionalConfig,
323344
};
345+
346+
// Update our cache so subsequent updates inherit the full set including any
347+
// dynamic fields we just overwrote.
348+
this.#lastSessionConfig = fullConfig;
349+
350+
return fullConfig;
324351
}
325352

326353
async updateAgent(newAgent: RealtimeAgent<TBaseContext>) {
@@ -689,6 +716,9 @@ export class RealtimeSession<
689716
url: options.url,
690717
initialSessionConfig: await this.#getSessionConfig(this.options.config),
691718
});
719+
// Ensure the cached lastSessionConfig includes everything passed as the initial session config
720+
// (the call above already set it via #getSessionConfig but in case additional overrides were
721+
// passed directly here in the future we could merge them). For now it's a no-op.
692722

693723
this.#history = [];
694724
this.emit('history_updated', this.#history);

packages/agents-realtime/test/realtimeSession.test.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,4 +300,20 @@ describe('RealtimeSession', () => {
300300
} as any);
301301
expect(startEvents).toBe(2);
302302
});
303+
304+
it('preserves custom audio formats across updateAgent', async () => {
305+
const t = new FakeTransport();
306+
const agent = new RealtimeAgent({ name: 'Orig', handoffs: [] });
307+
const s = new RealtimeSession(agent, {
308+
transport: t,
309+
config: { inputAudioFormat: 'g711_ulaw', outputAudioFormat: 'g711_ulaw' },
310+
});
311+
await s.connect({ apiKey: 'test' });
312+
const newAgent = new RealtimeAgent({ name: 'Next', handoffs: [] });
313+
await s.updateAgent(newAgent);
314+
// Find the last updateSessionConfig call
315+
const last = t.updateSessionConfigCalls.at(-1)!;
316+
expect(last.inputAudioFormat).toBe('g711_ulaw');
317+
expect(last.outputAudioFormat).toBe('g711_ulaw');
318+
});
303319
});

0 commit comments

Comments
 (0)