Skip to content

Commit ed4b4de

Browse files
committed
Improve the types of RealtimeAgent configuration
- inputAudioTranscription - turnDetection
1 parent fc99390 commit ed4b4de

File tree

4 files changed

+80
-5
lines changed

4 files changed

+80
-5
lines changed

.changeset/dull-beans-arrive.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
Improve the types of turnDetection and inputAudioTranscription in RealtimeAgent configuration

examples/docs/voice-agents/turnDetection.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ const session = new RealtimeSession(agent, {
77
turnDetection: {
88
type: 'semantic_vad',
99
eagerness: 'medium',
10-
create_response: true,
11-
interrupt_response: true,
10+
createResponse: true,
11+
interruptResponse: true,
1212
},
1313
},
1414
});

packages/agents-realtime/src/clientMessages.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,46 @@ export type RealtimeTracingConfig =
3333
}
3434
| 'auto';
3535

36+
export type RealtimeInputAudioTranscriptionConfig = {
37+
language?: string;
38+
model?: 'gpt-4o-transcribe' | 'gpt-4o-mini-transcribe' | 'whisper-1' | string;
39+
prompt?: string;
40+
};
41+
42+
export type RealtimeTurnDetectionConfigAsIs = {
43+
type?: 'semantic_vad';
44+
create_response?: boolean;
45+
eagerness?: 'auto' | 'low' | 'medium' | 'high';
46+
interrupt_response?: boolean;
47+
prefix_padding_ms?: number;
48+
silence_duration_ms?: number;
49+
threshold?: number;
50+
};
51+
52+
// The Realtime API accepts snake_cased keys, so when using this, this SDK coverts the keys to snake_case ones before passing it to the API
53+
export type RealtimeTurnDetectionConfigCamelCase = {
54+
type?: 'semantic_vad';
55+
createResponse?: boolean;
56+
eagerness?: 'auto' | 'low' | 'medium' | 'high';
57+
interruptResponse?: boolean;
58+
prefixPaddingMs?: number;
59+
silenceDurationMs?: number;
60+
threshold?: number;
61+
};
62+
63+
export type RealtimeTurnDetectionConfig =
64+
| RealtimeTurnDetectionConfigAsIs
65+
| RealtimeTurnDetectionConfigCamelCase;
66+
3667
export type RealtimeSessionConfig = {
3768
model: string;
3869
instructions: string;
3970
modalities: ('text' | 'audio')[];
4071
voice: string;
4172
inputAudioFormat: RealtimeAudioFormat;
4273
outputAudioFormat: RealtimeAudioFormat;
43-
inputAudioTranscription: Record<string, any>;
44-
turnDetection: Record<string, any>;
74+
inputAudioTranscription: RealtimeInputAudioTranscriptionConfig;
75+
turnDetection: RealtimeTurnDetectionConfig;
4576
toolChoice: ModelSettingsToolChoice;
4677
tools: FunctionToolDefinition[];
4778
tracing?: RealtimeTracingConfig | null;

packages/agents-realtime/src/openaiRealtimeBase.ts

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import {
55
RealtimeClientMessage,
66
RealtimeSessionConfig,
77
RealtimeTracingConfig,
8+
RealtimeTurnDetectionConfig,
9+
RealtimeTurnDetectionConfigAsIs,
810
RealtimeUserInput,
911
} from './clientMessages';
1012
import {
@@ -390,7 +392,7 @@ export abstract class OpenAIRealtimeBase
390392
config.inputAudioTranscription ??
391393
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.inputAudioTranscription,
392394
turn_detection:
393-
config.turnDetection ??
395+
OpenAIRealtimeBase.buildTurnDetectionConfig(config.turnDetection) ??
394396
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.turnDetection,
395397
tool_choice:
396398
config.toolChoice ?? DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.toolChoice,
@@ -406,6 +408,43 @@ export abstract class OpenAIRealtimeBase
406408
return sessionData;
407409
}
408410

411+
private static buildTurnDetectionConfig(
412+
c: RealtimeTurnDetectionConfig | undefined,
413+
): RealtimeTurnDetectionConfigAsIs | undefined {
414+
if (typeof c === 'undefined') {
415+
return undefined;
416+
}
417+
return {
418+
type: c.type,
419+
create_response:
420+
'createResponse' in c
421+
? c.createResponse
422+
: 'create_response' in c
423+
? c.create_response
424+
: undefined,
425+
eagerness: c.eagerness,
426+
interrupt_response:
427+
'interruptResponse' in c
428+
? c.interruptResponse
429+
: 'interrupt_response' in c
430+
? c.interrupt_response
431+
: undefined,
432+
prefix_padding_ms:
433+
'prefixPaddingMs' in c
434+
? c.prefixPaddingMs
435+
: 'prefix_padding_ms' in c
436+
? c.prefix_padding_ms
437+
: undefined,
438+
silence_duration_ms:
439+
'silenceDurationMs' in c
440+
? c.silenceDurationMs
441+
: 'silence_duration_ms' in c
442+
? c.silence_duration_ms
443+
: undefined,
444+
threshold: c.threshold,
445+
};
446+
}
447+
409448
/**
410449
* Sets the internal tracing config. This is used to track the tracing config that has been set
411450
* during the session.create event.

0 commit comments

Comments
 (0)