@@ -33,15 +33,52 @@ export type RealtimeTracingConfig =
3333 }
3434 | 'auto' ;
3535
36+ export type RealtimeInputAudioTranscriptionConfig = {
37+ language ?: string ;
38+ model ?:
39+ | 'gpt-4o-transcribe'
40+ | 'gpt-4o-mini-transcribe'
41+ | 'whisper-1'
42+ | ( string & { } ) ;
43+ prompt ?: string ;
44+ } ;
45+
46+ export type RealtimeTurnDetectionConfigAsIs = {
47+ type ?: 'semantic_vad' | 'server_vad' ;
48+ create_response ?: boolean ;
49+ eagerness ?: 'auto' | 'low' | 'medium' | 'high' ;
50+ interrupt_response ?: boolean ;
51+ prefix_padding_ms ?: number ;
52+ silence_duration_ms ?: number ;
53+ threshold ?: number ;
54+ } ;
55+
56+ // The Realtime API accepts snake_cased keys, so when using this, this SDK coverts the keys to snake_case ones before passing it to the API
57+ export type RealtimeTurnDetectionConfigCamelCase = {
58+ type ?: 'semantic_vad' | 'server_vad' ;
59+ createResponse ?: boolean ;
60+ eagerness ?: 'auto' | 'low' | 'medium' | 'high' ;
61+ interruptResponse ?: boolean ;
62+ prefixPaddingMs ?: number ;
63+ silenceDurationMs ?: number ;
64+ threshold ?: number ;
65+ } ;
66+
67+ export type RealtimeTurnDetectionConfig = (
68+ | RealtimeTurnDetectionConfigAsIs
69+ | RealtimeTurnDetectionConfigCamelCase
70+ ) &
71+ Record < string , any > ;
72+
3673export type RealtimeSessionConfig = {
3774 model : string ;
3875 instructions : string ;
3976 modalities : ( 'text' | 'audio' ) [ ] ;
4077 voice : string ;
4178 inputAudioFormat : RealtimeAudioFormat ;
4279 outputAudioFormat : RealtimeAudioFormat ;
43- inputAudioTranscription : Record < string , any > ;
44- turnDetection : Record < string , any > ;
80+ inputAudioTranscription : RealtimeInputAudioTranscriptionConfig ;
81+ turnDetection : RealtimeTurnDetectionConfig ;
4582 toolChoice : ModelSettingsToolChoice ;
4683 tools : FunctionToolDefinition [ ] ;
4784 tracing ?: RealtimeTracingConfig | null ;
0 commit comments