1
1
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
3
3
import { APIResource } from '../../core/resource' ;
4
+ import * as ClientSecretsAPI from './client-secrets' ;
4
5
import * as RealtimeAPI from './realtime' ;
5
6
import * as ResponsesAPI from '../responses/responses' ;
6
7
import { APIPromise } from '../../core/api-promise' ;
@@ -39,14 +40,19 @@ export interface RealtimeSessionClientSecret {
39
40
*/
40
41
export interface RealtimeSessionCreateResponse {
41
42
/**
42
- * Configuration for input and output audio .
43
+ * Ephemeral key returned by the API .
43
44
*/
44
- audio ?: RealtimeSessionCreateResponse . Audio ;
45
+ client_secret : RealtimeSessionClientSecret ;
45
46
46
47
/**
47
- * Ephemeral key returned by the API.
48
+ * The type of session to create. Always `realtime` for the Realtime API.
49
+ */
50
+ type : 'realtime' ;
51
+
52
+ /**
53
+ * Configuration for input and output audio.
48
54
*/
49
- client_secret ?: RealtimeSessionClientSecret ;
55
+ audio ?: RealtimeSessionCreateResponse . Audio ;
50
56
51
57
/**
52
58
* Additional fields to include in server outputs.
@@ -115,7 +121,7 @@ export interface RealtimeSessionCreateResponse {
115
121
/**
116
122
* Tools available to the model.
117
123
*/
118
- tools ?: Array < RealtimeAPI . Models | RealtimeSessionCreateResponse . McpTool > ;
124
+ tools ?: Array < RealtimeAPI . RealtimeFunctionTool | RealtimeSessionCreateResponse . McpTool > ;
119
125
120
126
/**
121
127
* Realtime API can write session traces to the
@@ -132,11 +138,6 @@ export interface RealtimeSessionCreateResponse {
132
138
* The default is `auto`.
133
139
*/
134
140
truncation ?: RealtimeAPI . RealtimeTruncation ;
135
-
136
- /**
137
- * The type of session to create. Always `realtime` for the Realtime API.
138
- */
139
- type ?: 'realtime' ;
140
141
}
141
142
142
143
export namespace RealtimeSessionCreateResponse {
@@ -238,7 +239,7 @@ export namespace RealtimeSessionCreateResponse {
238
239
239
240
/**
240
241
* Optional idle timeout after which turn detection will auto-timeout when no
241
- * additional audio is received.
242
+ * additional audio is received and emits a `timeout_triggered` event .
242
243
*/
243
244
idle_timeout_ms ?: number | null ;
244
245
@@ -491,87 +492,90 @@ export namespace RealtimeSessionCreateResponse {
491
492
}
492
493
493
494
/**
494
- * Ephemeral key returned by the API. Only present when the session is created on
495
- * the server via REST API.
495
+ * A Realtime transcription session configuration object.
496
496
*/
497
- export interface RealtimeTranscriptionSessionClientSecret {
498
- /**
499
- * Timestamp for when the token expires. Currently, all tokens expire after one
500
- * minute.
501
- */
502
- expires_at : number ;
503
-
497
+ export interface RealtimeTranscriptionSessionCreateResponse {
504
498
/**
505
- * Ephemeral key usable in client environments to authenticate connections to the
506
- * Realtime API. Use this in client-side environments rather than a standard API
507
- * token, which should only be used server-side.
499
+ * Unique identifier for the session that looks like `sess_1234567890abcdef`.
508
500
*/
509
- value : string ;
510
- }
501
+ id : string ;
511
502
512
- /**
513
- * A new Realtime transcription session configuration.
514
- *
515
- * When a session is created on the server via REST API, the session object also
516
- * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
517
- * not present when a session is updated via the WebSocket API.
518
- */
519
- export interface RealtimeTranscriptionSessionCreateResponse {
520
503
/**
521
- * Ephemeral key returned by the API. Only present when the session is created on
522
- * the server via REST API.
504
+ * The object type. Always `realtime.transcription_session`.
523
505
*/
524
- client_secret : RealtimeTranscriptionSessionClientSecret ;
506
+ object : string ;
525
507
526
508
/**
527
- * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw` .
509
+ * The type of session. Always `transcription` for transcription sessions .
528
510
*/
529
- input_audio_format ?: string ;
511
+ type : 'transcription' ;
530
512
531
513
/**
532
- * Configuration of the transcription model .
514
+ * Configuration for input audio for the session .
533
515
*/
534
- input_audio_transcription ?: RealtimeTranscriptionSessionInputAudioTranscription ;
516
+ audio ?: RealtimeTranscriptionSessionCreateResponse . Audio ;
535
517
536
518
/**
537
- * The set of modalities the model can respond with. To disable audio, set this to
538
- * ["text"].
519
+ * Expiration timestamp for the session, in seconds since epoch.
539
520
*/
540
- modalities ?: Array < 'text' | 'audio' > ;
521
+ expires_at ?: number ;
541
522
542
523
/**
543
- * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
544
- * means that the model will detect the start and end of speech based on audio
545
- * volume and respond at the end of user speech.
524
+ * Additional fields to include in server outputs.
525
+ *
526
+ * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
527
+ * transcription.
546
528
*/
547
- turn_detection ?: RealtimeTranscriptionSessionTurnDetection ;
529
+ include ?: Array < 'item.input_audio_transcription.logprobs' > ;
548
530
}
549
531
550
- /**
551
- * Configuration of the transcription model.
552
- */
553
- export interface RealtimeTranscriptionSessionInputAudioTranscription {
532
+ export namespace RealtimeTranscriptionSessionCreateResponse {
554
533
/**
555
- * The language of the input audio. Supplying the input language in
556
- * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
557
- * format will improve accuracy and latency.
534
+ * Configuration for input audio for the session.
558
535
*/
559
- language ?: string ;
536
+ export interface Audio {
537
+ input ?: Audio . Input ;
538
+ }
560
539
561
- /**
562
- * The model to use for transcription. Current options are `whisper-1`,
563
- * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
564
- */
565
- model ?: 'whisper-1' | 'gpt-4o-transcribe-latest' | 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe' ;
540
+ export namespace Audio {
541
+ export interface Input {
542
+ /**
543
+ * The PCM audio format. Only a 24kHz sample rate is supported.
544
+ */
545
+ format ?: RealtimeAPI . RealtimeAudioFormats ;
566
546
567
- /**
568
- * An optional text to guide the model's style or continue a previous audio
569
- * segment. For `whisper-1`, the
570
- * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
571
- * For `gpt-4o-transcribe` models, the prompt is a free text string, for example
572
- * "expect words related to technology".
573
- */
574
- prompt ?: string ;
547
+ /**
548
+ * Configuration for input audio noise reduction.
549
+ */
550
+ noise_reduction ?: Input . NoiseReduction ;
551
+
552
+ /**
553
+ * Configuration of the transcription model.
554
+ */
555
+ transcription ?: RealtimeAPI . AudioTranscription ;
556
+
557
+ /**
558
+ * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
559
+ * means that the model will detect the start and end of speech based on audio
560
+ * volume and respond at the end of user speech.
561
+ */
562
+ turn_detection ?: ClientSecretsAPI . RealtimeTranscriptionSessionTurnDetection ;
563
+ }
564
+
565
+ export namespace Input {
566
+ /**
567
+ * Configuration for input audio noise reduction.
568
+ */
569
+ export interface NoiseReduction {
570
+ /**
571
+ * Type of noise reduction. `near_field` is for close-talking microphones such as
572
+ * headphones, `far_field` is for far-field microphones such as laptop or
573
+ * conference room microphones.
574
+ */
575
+ type ?: RealtimeAPI . NoiseReductionType ;
576
+ }
577
+ }
578
+ }
575
579
}
576
580
577
581
/**
@@ -670,9 +674,7 @@ export declare namespace ClientSecrets {
670
674
export {
671
675
type RealtimeSessionClientSecret as RealtimeSessionClientSecret ,
672
676
type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse ,
673
- type RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret ,
674
677
type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse ,
675
- type RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription ,
676
678
type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection ,
677
679
type ClientSecretCreateResponse as ClientSecretCreateResponse ,
678
680
type ClientSecretCreateParams as ClientSecretCreateParams ,
0 commit comments