Skip to content

Commit 49bfe25

Browse files
authored
Improve the types of RealtimeSession configuration (#96)
1 parent 053cd55 commit 49bfe25

File tree

5 files changed

+92
-6
lines changed

5 files changed

+92
-6
lines changed

.changeset/dull-beans-arrive.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
Improve the types of turnDetection and inputAudioTranscription in RealtimeAgent configuration

examples/docs/voice-agents/turnDetection.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ const session = new RealtimeSession(agent, {
77
turnDetection: {
88
type: 'semantic_vad',
99
eagerness: 'medium',
10-
create_response: true,
11-
interrupt_response: true,
10+
createResponse: true,
11+
interruptResponse: true,
1212
},
1313
},
1414
});

packages/agents-realtime/src/clientMessages.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,52 @@ export type RealtimeTracingConfig =
3333
}
3434
| 'auto';
3535

36+
export type RealtimeInputAudioTranscriptionConfig = {
37+
language?: string;
38+
model?:
39+
| 'gpt-4o-transcribe'
40+
| 'gpt-4o-mini-transcribe'
41+
| 'whisper-1'
42+
| (string & {});
43+
prompt?: string;
44+
};
45+
46+
export type RealtimeTurnDetectionConfigAsIs = {
47+
type?: 'semantic_vad' | 'server_vad';
48+
create_response?: boolean;
49+
eagerness?: 'auto' | 'low' | 'medium' | 'high';
50+
interrupt_response?: boolean;
51+
prefix_padding_ms?: number;
52+
silence_duration_ms?: number;
53+
threshold?: number;
54+
};
55+
56+
// The Realtime API accepts snake_cased keys, so when using this, this SDK coverts the keys to snake_case ones before passing it to the API
57+
export type RealtimeTurnDetectionConfigCamelCase = {
58+
type?: 'semantic_vad' | 'server_vad';
59+
createResponse?: boolean;
60+
eagerness?: 'auto' | 'low' | 'medium' | 'high';
61+
interruptResponse?: boolean;
62+
prefixPaddingMs?: number;
63+
silenceDurationMs?: number;
64+
threshold?: number;
65+
};
66+
67+
export type RealtimeTurnDetectionConfig = (
68+
| RealtimeTurnDetectionConfigAsIs
69+
| RealtimeTurnDetectionConfigCamelCase
70+
) &
71+
Record<string, any>;
72+
3673
export type RealtimeSessionConfig = {
3774
model: string;
3875
instructions: string;
3976
modalities: ('text' | 'audio')[];
4077
voice: string;
4178
inputAudioFormat: RealtimeAudioFormat;
4279
outputAudioFormat: RealtimeAudioFormat;
43-
inputAudioTranscription: Record<string, any>;
44-
turnDetection: Record<string, any>;
80+
inputAudioTranscription: RealtimeInputAudioTranscriptionConfig;
81+
turnDetection: RealtimeTurnDetectionConfig;
4582
toolChoice: ModelSettingsToolChoice;
4683
tools: FunctionToolDefinition[];
4784
tracing?: RealtimeTracingConfig | null;

packages/agents-realtime/src/openaiRealtimeBase.ts

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import {
55
RealtimeClientMessage,
66
RealtimeSessionConfig,
77
RealtimeTracingConfig,
8+
RealtimeTurnDetectionConfig,
9+
RealtimeTurnDetectionConfigAsIs,
810
RealtimeUserInput,
911
} from './clientMessages';
1012
import {
@@ -390,7 +392,7 @@ export abstract class OpenAIRealtimeBase
390392
config.inputAudioTranscription ??
391393
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.inputAudioTranscription,
392394
turn_detection:
393-
config.turnDetection ??
395+
OpenAIRealtimeBase.buildTurnDetectionConfig(config.turnDetection) ??
394396
DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.turnDetection,
395397
tool_choice:
396398
config.toolChoice ?? DEFAULT_OPENAI_REALTIME_SESSION_CONFIG.toolChoice,
@@ -406,6 +408,48 @@ export abstract class OpenAIRealtimeBase
406408
return sessionData;
407409
}
408410

411+
private static buildTurnDetectionConfig(
412+
c: RealtimeTurnDetectionConfig | undefined,
413+
): RealtimeTurnDetectionConfigAsIs | undefined {
414+
if (typeof c === 'undefined') {
415+
return undefined;
416+
}
417+
const {
418+
type,
419+
createResponse,
420+
create_response,
421+
eagerness,
422+
interruptResponse,
423+
interrupt_response,
424+
prefixPaddingMs,
425+
prefix_padding_ms,
426+
silenceDurationMs,
427+
silence_duration_ms,
428+
threshold,
429+
...rest
430+
} = c;
431+
432+
const config: RealtimeTurnDetectionConfigAsIs & Record<string, any> = {
433+
type,
434+
create_response: createResponse ? createResponse : create_response,
435+
eagerness,
436+
interrupt_response: interruptResponse
437+
? interruptResponse
438+
: interrupt_response,
439+
prefix_padding_ms: prefixPaddingMs ? prefixPaddingMs : prefix_padding_ms,
440+
silence_duration_ms: silenceDurationMs
441+
? silenceDurationMs
442+
: silence_duration_ms,
443+
threshold,
444+
...rest,
445+
};
446+
// Remove undefined values from the config
447+
Object.keys(config).forEach((key) => {
448+
if (config[key] === undefined) delete config[key];
449+
});
450+
return Object.keys(config).length > 0 ? config : undefined;
451+
}
452+
409453
/**
410454
* Sets the internal tracing config. This is used to track the tracing config that has been set
411455
* during the session.create event.

packages/agents-realtime/src/realtimeSession.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ export class RealtimeSession<
519519
this.#transport.on('turn_done', (event) => {
520520
const item = event.response.output[event.response.output.length - 1];
521521
const textOutput = getLastTextFromAudioOutputMessage(item) ?? '';
522-
const itemId = item.id ?? '';
522+
const itemId = item?.id ?? '';
523523
this.emit('agent_end', this.#context, this.#currentAgent, textOutput);
524524
this.#currentAgent.emit('agent_end', this.#context, textOutput);
525525

0 commit comments

Comments
 (0)