Skip to content

Commit ffe4c4f

Browse files
committed
fix: session config breaks tool registration - cannot use tools with audio customization
- Fix _getMergedSessionConfig to only include tools field when tools are present - Prevent sending empty/undefined tools array which disables tool calls - Add comprehensive tests covering the issue scenarios - Resolves mutual exclusivity between session config and tool functionality Fixes #339
1 parent 6f1677c commit ffe4c4f

File tree

3 files changed

+232
-81
lines changed

3 files changed

+232
-81
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
Fix session config breaking tool registration
6+
7+
When initializing a RealtimeSession with any config object, tool registration was being broken because empty or undefined tools arrays were being sent to the OpenAI API, which disabled tool calls. This fix ensures that the tools field is only included in session updates when tools are actually present, preventing the API from disabling tool functionality when session configuration is provided.
8+
9+
The issue occurred because the `_getMergedSessionConfig` method would include `tools: undefined` in the session data when no tools were explicitly provided in the config, which the OpenAI API interpreted as a request to disable all tools. Now, the tools field is only included when tools are actually available and non-empty.
10+
11+
This allows users to customize audio settings (voice, turn detection, noise reduction, etc.) while maintaining tool functionality, resolving the mutual exclusivity between session configuration and tool registration.

packages/agents-realtime/src/openaiRealtimeBase.ts

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ export abstract class OpenAIRealtimeBase
387387
}
388388

389389
protected _getMergedSessionConfig(config: Partial<RealtimeSessionConfig>) {
390-
const sessionData = {
390+
const sessionData: any = {
391391
instructions: config.instructions,
392392
model:
393393
config.model ??
@@ -414,15 +414,20 @@ export abstract class OpenAIRealtimeBase
414414
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.turnDetection,
415415
tool_choice:
416416
config.toolChoice ?? DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.toolChoice,
417-
tools: config.tools?.map((tool) => ({
418-
...tool,
419-
strict: undefined,
420-
})),
421417
// We don't set tracing here to make sure that we don't try to override it on every
422418
// session.update as it might lead to errors
423419
...(config.providerData ?? {}),
424420
};
425421

422+
// Only include tools if they are explicitly provided and not empty
423+
// This prevents sending an empty tools array which would disable tool calls
424+
if (config.tools && config.tools.length > 0) {
425+
sessionData.tools = config.tools.map((tool) => ({
426+
...tool,
427+
strict: undefined,
428+
}));
429+
}
430+
426431
return sessionData;
427432
}
428433

packages/agents-realtime/test/realtimeSession.test.ts

Lines changed: 211 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -318,84 +318,219 @@ describe('RealtimeSession', () => {
318318
expect(last.outputAudioFormat).toBe('g711_ulaw');
319319
});
320320

321-
it('defaults item status to completed for done output items without status', async () => {
322-
class TestTransport extends OpenAIRealtimeBase {
323-
status: 'connected' | 'disconnected' | 'connecting' | 'disconnecting' =
324-
'connected';
325-
connect = vi.fn(async () => {});
326-
sendEvent = vi.fn();
327-
mute = vi.fn();
328-
close = vi.fn();
329-
interrupt = vi.fn();
330-
get muted() {
331-
return false;
321+
it('defaults item status to completed for done output items without status', async () => {
322+
class TestTransport extends OpenAIRealtimeBase {
323+
status: 'connected' | 'disconnected' | 'connecting' | 'disconnecting' =
324+
'connected';
325+
connect = vi.fn(async () => { });
326+
sendEvent = vi.fn();
327+
mute = vi.fn();
328+
close = vi.fn();
329+
interrupt = vi.fn();
330+
get muted() {
331+
return false;
332+
}
332333
}
333-
}
334-
const transport = new TestTransport();
335-
const agent = new RealtimeAgent({ name: 'A', handoffs: [] });
336-
const session = new RealtimeSession(agent, { transport });
337-
await session.connect({ apiKey: 'test' });
338-
const historyEvents: RealtimeItem[][] = [];
339-
session.on('history_updated', (h) => historyEvents.push([...h]));
340-
(transport as any)._onMessage({
341-
data: JSON.stringify({
342-
type: 'response.output_item.done',
343-
event_id: 'e',
344-
item: {
345-
id: 'm1',
346-
type: 'message',
347-
role: 'assistant',
348-
content: [{ type: 'text', text: 'hi' }],
349-
},
350-
output_index: 0,
351-
response_id: 'r1',
352-
}),
334+
const transport = new TestTransport();
335+
const agent = new RealtimeAgent({ name: 'A', handoffs: [] });
336+
const session = new RealtimeSession(agent, { transport });
337+
await session.connect({ apiKey: 'test' });
338+
const historyEvents: RealtimeItem[][] = [];
339+
session.on('history_updated', (h) => historyEvents.push([...h]));
340+
(transport as any)._onMessage({
341+
data: JSON.stringify({
342+
type: 'response.output_item.done',
343+
event_id: 'e',
344+
item: {
345+
id: 'm1',
346+
type: 'message',
347+
role: 'assistant',
348+
content: [{ type: 'text', text: 'hi' }],
349+
},
350+
output_index: 0,
351+
response_id: 'r1',
352+
}),
353+
});
354+
const latest = historyEvents.at(-1)!;
355+
const msg = latest.find(
356+
(i): i is Extract<RealtimeItem, { type: 'message'; role: 'assistant' }> =>
357+
i.type === 'message' && i.role === 'assistant' && (i as any).itemId === 'm1'
358+
);
359+
expect(msg).toBeDefined();
360+
expect(msg!.status).toBe('completed');
353361
});
354-
const latest = historyEvents.at(-1)!;
355-
const msg = latest.find(
356-
(i): i is Extract<RealtimeItem, { type: 'message'; role: 'assistant' }> =>
357-
i.type === 'message' && i.role === 'assistant' && (i as any).itemId === 'm1'
358-
);
359-
expect(msg).toBeDefined();
360-
expect(msg!.status).toBe('completed');
361-
});
362-
363-
it('preserves explicit completed status on done', async () => {
364-
class TestTransport extends OpenAIRealtimeBase {
365-
status: 'connected' | 'disconnected' | 'connecting' | 'disconnecting' = 'connected';
366-
connect = vi.fn(async () => {});
367-
sendEvent = vi.fn(); mute = vi.fn(); close = vi.fn(); interrupt = vi.fn();
368-
get muted() { return false; }
369-
}
370-
const transport = new TestTransport();
371-
const session = new RealtimeSession(new RealtimeAgent({ name: 'A', handoffs: [] }), { transport });
372-
await session.connect({ apiKey: 'test' });
373-
374-
const historyEvents: RealtimeItem[][] = [];
375-
session.on('history_updated', (h) => historyEvents.push([...h]));
376-
377-
(transport as any)._onMessage({
378-
data: JSON.stringify({
379-
type: 'response.output_item.done',
380-
event_id: 'e',
381-
item: {
382-
id: 'm2',
383-
type: 'message',
384-
role: 'assistant',
385-
status: 'completed',
386-
content: [{ type: 'text', text: 'hi again' }],
387-
},
388-
output_index: 0,
389-
response_id: 'r2',
390-
}),
362+
363+
it('preserves explicit completed status on done', async () => {
364+
class TestTransport extends OpenAIRealtimeBase {
365+
status: 'connected' | 'disconnected' | 'connecting' | 'disconnecting' = 'connected';
366+
connect = vi.fn(async () => { });
367+
sendEvent = vi.fn(); mute = vi.fn(); close = vi.fn(); interrupt = vi.fn();
368+
get muted() { return false; }
369+
}
370+
const transport = new TestTransport();
371+
const session = new RealtimeSession(new RealtimeAgent({ name: 'A', handoffs: [] }), { transport });
372+
await session.connect({ apiKey: 'test' });
373+
374+
const historyEvents: RealtimeItem[][] = [];
375+
session.on('history_updated', (h) => historyEvents.push([...h]));
376+
377+
(transport as any)._onMessage({
378+
data: JSON.stringify({
379+
type: 'response.output_item.done',
380+
event_id: 'e',
381+
item: {
382+
id: 'm2',
383+
type: 'message',
384+
role: 'assistant',
385+
status: 'completed',
386+
content: [{ type: 'text', text: 'hi again' }],
387+
},
388+
output_index: 0,
389+
response_id: 'r2',
390+
}),
391+
});
392+
393+
const latest = historyEvents.at(-1)!;
394+
const msg = latest.find(
395+
(i): i is Extract<RealtimeItem, { type: 'message'; role: 'assistant' }> =>
396+
i.type === 'message' && i.role === 'assistant' && (i as any).itemId === 'm2'
397+
);
398+
expect(msg).toBeDefined();
399+
expect(msg!.status).toBe('completed'); // ensure we didn't overwrite server status
400+
});
401+
402+
it('includes tools in session config when session config is provided', async () => {
403+
const transport = new FakeTransport();
404+
const agent = new RealtimeAgent({
405+
name: 'TestAgent',
406+
handoffs: [],
407+
tools: [TEST_TOOL]
408+
});
409+
410+
// Test with session config - tools should still be included
411+
const session = new RealtimeSession(agent, {
412+
transport,
413+
config: {
414+
voice: 'alloy',
415+
turnDetection: { type: 'server_vad' }
416+
}
417+
});
418+
419+
await session.connect({ apiKey: 'test' });
420+
421+
// Check that the initial session config includes tools
422+
const connectCall = transport.connectCalls[0];
423+
expect(connectCall?.initialSessionConfig?.tools).toBeDefined();
424+
expect(connectCall?.initialSessionConfig?.tools).toHaveLength(1);
425+
expect(connectCall?.initialSessionConfig?.tools?.[0]?.name).toBe('test');
426+
427+
// Check that voice config is also preserved
428+
expect(connectCall?.initialSessionConfig?.voice).toBe('alloy');
429+
expect(connectCall?.initialSessionConfig?.turnDetection).toEqual({ type: 'server_vad' });
391430
});
392431

393-
const latest = historyEvents.at(-1)!;
394-
const msg = latest.find(
395-
(i): i is Extract<RealtimeItem, { type: 'message'; role: 'assistant' }> =>
396-
i.type === 'message' && i.role === 'assistant' && (i as any).itemId === 'm2'
397-
);
398-
expect(msg).toBeDefined();
399-
expect(msg!.status).toBe('completed'); // ensure we didn't overwrite server status
400-
});
432+
it('includes tools in session config when no session config is provided', async () => {
433+
const transport = new FakeTransport();
434+
const agent = new RealtimeAgent({
435+
name: 'TestAgent',
436+
handoffs: [],
437+
tools: [TEST_TOOL]
438+
});
439+
440+
// Test without session config - tools should be included
441+
const session = new RealtimeSession(agent, { transport });
442+
443+
await session.connect({ apiKey: 'test' });
444+
445+
// Check that the initial session config includes tools
446+
const connectCall = transport.connectCalls[0];
447+
expect(connectCall?.initialSessionConfig?.tools).toBeDefined();
448+
expect(connectCall?.initialSessionConfig?.tools).toHaveLength(1);
449+
expect(connectCall?.initialSessionConfig?.tools?.[0]?.name).toBe('test');
450+
});
451+
452+
it('preserves tools when updateSessionConfig is called', async () => {
453+
const transport = new FakeTransport();
454+
const agent = new RealtimeAgent({
455+
name: 'TestAgent',
456+
handoffs: [],
457+
tools: [TEST_TOOL]
458+
});
459+
460+
const session = new RealtimeSession(agent, {
461+
transport,
462+
config: {
463+
voice: 'alloy'
464+
}
465+
});
466+
467+
await session.connect({ apiKey: 'test' });
468+
469+
// Check that updateSessionConfig calls include tools
470+
expect(transport.updateSessionConfigCalls.length).toBeGreaterThan(0);
471+
const lastUpdateCall = transport.updateSessionConfigCalls[transport.updateSessionConfigCalls.length - 1];
472+
expect(lastUpdateCall.tools).toBeDefined();
473+
expect(lastUpdateCall.tools).toHaveLength(1);
474+
expect(lastUpdateCall.tools?.[0]?.name).toBe('test');
475+
});
476+
477+
it('does not include tools field when no tools are provided', async () => {
478+
const transport = new FakeTransport();
479+
const agent = new RealtimeAgent({
480+
name: 'TestAgent',
481+
handoffs: [],
482+
tools: [] // No tools
483+
});
484+
485+
const session = new RealtimeSession(agent, {
486+
transport,
487+
config: {
488+
voice: 'alloy'
489+
}
490+
});
491+
492+
await session.connect({ apiKey: 'test' });
493+
494+
// Check that updateSessionConfig calls do not include tools field
495+
expect(transport.updateSessionConfigCalls.length).toBeGreaterThan(0);
496+
const lastUpdateCall = transport.updateSessionConfigCalls[transport.updateSessionConfigCalls.length - 1];
497+
expect(lastUpdateCall.hasOwnProperty('tools')).toBe(false);
498+
});
499+
500+
it('reproduces the original issue - tools work with config provided', async () => {
501+
const transport = new FakeTransport();
502+
const agent = new RealtimeAgent({
503+
name: 'TestAgent',
504+
handoffs: [],
505+
tools: [TEST_TOOL]
506+
});
507+
508+
// This is the scenario from the issue - session with config that includes voice setting
509+
const session = new RealtimeSession(agent, {
510+
transport,
511+
config: {
512+
voice: 'alloy', // Even just voice setting should not break tools
513+
turnDetection: { type: 'server_vad' }
514+
}
515+
});
516+
517+
await session.connect({ apiKey: 'test' });
518+
519+
// Verify that tools are included in both initial and update configs
520+
const connectCall = transport.connectCalls[0];
521+
expect(connectCall?.initialSessionConfig?.tools).toBeDefined();
522+
expect(connectCall?.initialSessionConfig?.tools).toHaveLength(1);
523+
expect(connectCall?.initialSessionConfig?.tools?.[0]?.name).toBe('test');
524+
525+
// Verify that subsequent updates also include tools
526+
expect(transport.updateSessionConfigCalls.length).toBeGreaterThan(0);
527+
const lastUpdateCall = transport.updateSessionConfigCalls[transport.updateSessionConfigCalls.length - 1];
528+
expect(lastUpdateCall.tools).toBeDefined();
529+
expect(lastUpdateCall.tools).toHaveLength(1);
530+
expect(lastUpdateCall.tools?.[0]?.name).toBe('test');
531+
532+
// Verify that voice config is preserved
533+
expect(connectCall?.initialSessionConfig?.voice).toBe('alloy');
534+
expect(lastUpdateCall.voice).toBe('alloy');
535+
});
401536
});

0 commit comments

Comments
 (0)