Skip to content

Commit 06549eb

Browse files
committed
fix: #494 Voice input transcription failing in realtime-demo
1 parent 0fd8b6e commit 06549eb

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

.changeset/wet-snakes-stare.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
fix: #494 Voice input transcription failing in realtime-demo

packages/agents-realtime/src/realtimeSession.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ import {
2929
RealtimeOutputGuardrailSettings,
3030
} from './guardrail';
3131
import { RealtimeItem } from './items';
32-
import { OpenAIRealtimeModels } from './openaiRealtimeBase';
32+
import {
33+
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG,
34+
OpenAIRealtimeModels,
35+
} from './openaiRealtimeBase';
3336
import { OpenAIRealtimeWebRTC } from './openaiRealtimeWebRtc';
3437
import { OpenAIRealtimeWebSocket } from './openaiRealtimeWebsocket';
3538
import { RealtimeAgent } from './realtimeAgent';
@@ -147,6 +150,12 @@ export type RealtimeSessionConnectOptions = {
147150
url?: string;
148151
};
149152

153+
function cloneDefaultSessionConfig(): Partial<RealtimeSessionConfig> {
154+
return JSON.parse(
155+
JSON.stringify(DEFAULT_OPENAI_REALTIME_SESSION_CONFIG),
156+
) as Partial<RealtimeSessionConfig>;
157+
}
158+
150159
/**
151160
* A `RealtimeSession` is the cornerstone of building Voice Agents. It's the equivalent of a
152161
* Runner in text-based agents except that it automatically handles multiple turns by maintaining a
@@ -206,7 +215,8 @@ export class RealtimeSession<
206215
// modalities, speed, toolChoice, turnDetection, etc.). Without this, updating
207216
// the agent would drop audio format overrides (e.g. g711_ulaw) and revert to
208217
// transport defaults causing issues for integrations like Twilio.
209-
#lastSessionConfig: Partial<RealtimeSessionConfig> | null = null;
218+
#lastSessionConfig: Partial<RealtimeSessionConfig> | null =
219+
cloneDefaultSessionConfig();
210220
#automaticallyTriggerResponseForMcpToolCalls: boolean = true;
211221

212222
constructor(

packages/agents-realtime/test/realtimeSession.test.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ import {
1111
} from '@openai/agents-core';
1212
import * as utils from '../src/utils';
1313
import type { TransportToolCallEvent } from '../src/transportLayerEvents';
14-
import { OpenAIRealtimeBase } from '../src/openaiRealtimeBase';
14+
import {
15+
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG,
16+
OpenAIRealtimeBase,
17+
} from '../src/openaiRealtimeBase';
1518

1619
function createMessage(id: string, text: string): RealtimeItem {
1720
return {
@@ -122,6 +125,19 @@ describe('RealtimeSession', () => {
122125
expect(t.connectCalls[0]?.url).toBe('ws://example');
123126
});
124127

128+
it('includes default transcription config when connecting', async () => {
129+
const t = new FakeTransport();
130+
const agent = new RealtimeAgent({ name: 'A', handoffs: [] });
131+
const s = new RealtimeSession(agent, { transport: t });
132+
await s.connect({ apiKey: 'test' });
133+
134+
expect(
135+
t.connectCalls[0]?.initialSessionConfig?.audio?.input?.transcription,
136+
).toEqual(
137+
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.audio?.input?.transcription,
138+
);
139+
});
140+
125141
it('updateHistory accepts callback', () => {
126142
const item = createMessage('1', 'hi');
127143
session.updateHistory([item]);

0 commit comments

Comments
 (0)