@@ -140,9 +140,25 @@ export interface ChatCompletion {
140140 object : 'chat.completion' ;
141141
142142 /**
143- * The service tier used for processing the request.
143+ * Specifies the latency tier to use for processing the request. This parameter is
144+ * relevant for customers subscribed to the scale tier service:
145+ *
146+ * - If set to 'auto', and the Project is Scale tier enabled, the system will
147+ * utilize scale tier credits until they are exhausted.
148+ * - If set to 'auto', and the Project is not Scale tier enabled, the request will
149+ * be processed using the default service tier with a lower uptime SLA and no
150+ * latency guarentee.
151+ * - If set to 'default', the request will be processed using the default service
152+ * tier with a lower uptime SLA and no latency guarentee.
153+ * - If set to 'flex', the request will be processed with the Flex Processing
154+ * service tier.
155+ * [Learn more](https://platform.openai.com/docs/guides/flex-processing).
156+ * - When not set, the default behavior is 'auto'.
157+ *
158+ * When this parameter is set, the response body will include the `service_tier`
159+ * utilized.
144160 */
145- service_tier ?: 'scale ' | 'default' | null ;
161+ service_tier ?: 'auto ' | 'default' | 'flex ' | null ;
146162
147163 /**
148164 * This fingerprint represents the backend configuration that the model runs with.
@@ -319,11 +335,11 @@ export interface ChatCompletionAudioParam {
319335 * Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
320336 * or `pcm16`.
321337 */
322- format : 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16' ;
338+ format : 'wav' | 'aac' | ' mp3' | 'flac' | 'opus' | 'pcm16' ;
323339
324340 /**
325341 * The voice the model uses to respond. Supported voices are `alloy`, `ash`,
326- * `ballad`, `coral`, `echo`, `sage`, and `shimmer`.
342+ * `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, ` sage`, and `shimmer`.
327343 */
328344 voice :
329345 | ( string & { } )
@@ -375,9 +391,25 @@ export interface ChatCompletionChunk {
375391 object : 'chat.completion.chunk' ;
376392
377393 /**
378- * The service tier used for processing the request.
394+ * Specifies the latency tier to use for processing the request. This parameter is
395+ * relevant for customers subscribed to the scale tier service:
396+ *
397+ * - If set to 'auto', and the Project is Scale tier enabled, the system will
398+ * utilize scale tier credits until they are exhausted.
399+ * - If set to 'auto', and the Project is not Scale tier enabled, the request will
400+ * be processed using the default service tier with a lower uptime SLA and no
401+ * latency guarentee.
402+ * - If set to 'default', the request will be processed using the default service
403+ * tier with a lower uptime SLA and no latency guarentee.
404+ * - If set to 'flex', the request will be processed with the Flex Processing
405+ * service tier.
406+ * [Learn more](https://platform.openai.com/docs/guides/flex-processing).
407+ * - When not set, the default behavior is 'auto'.
408+ *
409+ * When this parameter is set, the response body will include the `service_tier`
410+ * utilized.
379411 */
380- service_tier ?: 'scale ' | 'default' | null ;
412+ service_tier ?: 'auto ' | 'default' | 'flex ' | null ;
381413
382414 /**
383415 * This fingerprint represents the backend configuration that the model runs with.
@@ -1114,7 +1146,7 @@ export interface ChatCompletionCreateParamsBase {
11141146 messages : Array < ChatCompletionMessageParam > ;
11151147
11161148 /**
1117- * Model ID used to generate the response, like `gpt-4o` or `o1 `. OpenAI offers a
1149+ * Model ID used to generate the response, like `gpt-4o` or `o3 `. OpenAI offers a
11181150 * wide range of models with different capabilities, performance characteristics,
11191151 * and price points. Refer to the
11201152 * [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1194,7 +1226,7 @@ export interface ChatCompletionCreateParamsBase {
11941226 *
11951227 * This value is now deprecated in favor of `max_completion_tokens`, and is not
11961228 * compatible with
1197- * [o1 series models](https://platform.openai.com/docs/guides/reasoning).
1229+ * [o- series models](https://platform.openai.com/docs/guides/reasoning).
11981230 */
11991231 max_tokens ?: number | null ;
12001232
@@ -1296,14 +1328,19 @@ export interface ChatCompletionCreateParamsBase {
12961328 * latency guarentee.
12971329 * - If set to 'default', the request will be processed using the default service
12981330 * tier with a lower uptime SLA and no latency guarentee.
1331+ * - If set to 'flex', the request will be processed with the Flex Processing
1332+ * service tier.
1333+ * [Learn more](https://platform.openai.com/docs/guides/flex-processing).
12991334 * - When not set, the default behavior is 'auto'.
13001335 *
13011336 * When this parameter is set, the response body will include the `service_tier`
13021337 * utilized.
13031338 */
1304- service_tier ?: 'auto' | 'default' | null ;
1339+ service_tier ?: 'auto' | 'default' | 'flex' | null ;
13051340
13061341 /**
1342+ * Not supported with latest reasoning models `o3` and `o4-mini`.
1343+ *
13071344 * Up to 4 sequences where the API will stop generating further tokens. The
13081345 * returned text will not contain the stop sequence.
13091346 */
0 commit comments