feat(api): add o3 and o4-mini model IDs

stainless-app[bot] · stainless-app[bot] · commit 3b6caaf8133c · 2025-04-16T11:48:52.000-05:00
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 97
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a555f81249cb084f463dcefa4aba069f9341fdaf3dd6ac27d7f237fc90e8f488.yml
-openapi_spec_hash: 8e590296cd1a54b9508510b0c7a2c45a
-config_hash: 5ea32de61ff42fcf5e66cff8d9e247ea
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml
+openapi_spec_hash: c855121b2b2324b99499c9244c21d24d
+config_hash: d20837393b73efdb19cd08e04c1cc9a1
diff --git a/src/resources/chat/completions/completions.ts b/src/resources/chat/completions/completions.ts
@@ -140,9 +140,25 @@ export interface ChatCompletion {
   object: 'chat.completion';
 
   /**
-   * The service tier used for processing the request.
+   * Specifies the latency tier to use for processing the request. This parameter is
+   * relevant for customers subscribed to the scale tier service:
+   *
+   * - If set to 'auto', and the Project is Scale tier enabled, the system will
+   *   utilize scale tier credits until they are exhausted.
+   * - If set to 'auto', and the Project is not Scale tier enabled, the request will
+   *   be processed using the default service tier with a lower uptime SLA and no
+   *   latency guarentee.
+   * - If set to 'default', the request will be processed using the default service
+   *   tier with a lower uptime SLA and no latency guarentee.
+   * - If set to 'flex', the request will be processed with the Flex Processing
+   *   service tier.
+   *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+   * - When not set, the default behavior is 'auto'.
+   *
+   * When this parameter is set, the response body will include the `service_tier`
+   * utilized.
    */
-  service_tier?: 'scale' | 'default' | null;
+  service_tier?: 'auto' | 'default' | 'flex' | null;
 
   /**
    * This fingerprint represents the backend configuration that the model runs with.
@@ -319,11 +335,11 @@ export interface ChatCompletionAudioParam {
    * Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
    * or `pcm16`.
    */
-  format: 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16';
+  format: 'wav' | 'aac' | 'mp3' | 'flac' | 'opus' | 'pcm16';
 
   /**
    * The voice the model uses to respond. Supported voices are `alloy`, `ash`,
-   * `ballad`, `coral`, `echo`, `sage`, and `shimmer`.
+   * `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, `sage`, and `shimmer`.
    */
   voice:
     | (string & {})
@@ -375,9 +391,25 @@ export interface ChatCompletionChunk {
   object: 'chat.completion.chunk';
 
   /**
-   * The service tier used for processing the request.
+   * Specifies the latency tier to use for processing the request. This parameter is
+   * relevant for customers subscribed to the scale tier service:
+   *
+   * - If set to 'auto', and the Project is Scale tier enabled, the system will
+   *   utilize scale tier credits until they are exhausted.
+   * - If set to 'auto', and the Project is not Scale tier enabled, the request will
+   *   be processed using the default service tier with a lower uptime SLA and no
+   *   latency guarentee.
+   * - If set to 'default', the request will be processed using the default service
+   *   tier with a lower uptime SLA and no latency guarentee.
+   * - If set to 'flex', the request will be processed with the Flex Processing
+   *   service tier.
+   *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+   * - When not set, the default behavior is 'auto'.
+   *
+   * When this parameter is set, the response body will include the `service_tier`
+   * utilized.
    */
-  service_tier?: 'scale' | 'default' | null;
+  service_tier?: 'auto' | 'default' | 'flex' | null;
 
   /**
    * This fingerprint represents the backend configuration that the model runs with.
@@ -1114,7 +1146,7 @@ export interface ChatCompletionCreateParamsBase {
   messages: Array<ChatCompletionMessageParam>;
 
   /**
-   * Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+   * Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
    * wide range of models with different capabilities, performance characteristics,
    * and price points. Refer to the
    * [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1194,7 +1226,7 @@ export interface ChatCompletionCreateParamsBase {
    *
    * This value is now deprecated in favor of `max_completion_tokens`, and is not
    * compatible with
-   * [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+   * [o-series models](https://platform.openai.com/docs/guides/reasoning).
    */
   max_tokens?: number | null;
 
@@ -1296,14 +1328,19 @@ export interface ChatCompletionCreateParamsBase {
    *   latency guarentee.
    * - If set to 'default', the request will be processed using the default service
    *   tier with a lower uptime SLA and no latency guarentee.
+   * - If set to 'flex', the request will be processed with the Flex Processing
+   *   service tier.
+   *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
    * - When not set, the default behavior is 'auto'.
    *
    * When this parameter is set, the response body will include the `service_tier`
    * utilized.
    */
-  service_tier?: 'auto' | 'default' | null;
+  service_tier?: 'auto' | 'default' | 'flex' | null;
 
   /**
+   * Not supported with latest reasoning models `o3` and `o4-mini`.
+   *
    * Up to 4 sequences where the API will stop generating further tokens. The
    * returned text will not contain the stop sequence.
    */
diff --git a/src/resources/completions.ts b/src/resources/completions.ts
@@ -293,6 +293,8 @@ export interface CompletionCreateParamsBase {
   seed?: number | null;
 
   /**
+   * Not supported with latest reasoning models `o3` and `o4-mini`.
+   *
    * Up to 4 sequences where the API will stop generating further tokens. The
    * returned text will not contain the stop sequence.
    */
diff --git a/src/resources/responses/responses.ts b/src/resources/responses/responses.ts
@@ -321,7 +321,7 @@ export interface Response {
   metadata: Shared.Metadata | null;
 
   /**
-   * Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+   * Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
    * wide range of models with different capabilities, performance characteristics,
    * and price points. Refer to the
    * [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -414,6 +414,27 @@ export interface Response {
    */
   reasoning?: Shared.Reasoning | null;
 
+  /**
+   * Specifies the latency tier to use for processing the request. This parameter is
+   * relevant for customers subscribed to the scale tier service:
+   *
+   * - If set to 'auto', and the Project is Scale tier enabled, the system will
+   *   utilize scale tier credits until they are exhausted.
+   * - If set to 'auto', and the Project is not Scale tier enabled, the request will
+   *   be processed using the default service tier with a lower uptime SLA and no
+   *   latency guarentee.
+   * - If set to 'default', the request will be processed using the default service
+   *   tier with a lower uptime SLA and no latency guarentee.
+   * - If set to 'flex', the request will be processed with the Flex Processing
+   *   service tier.
+   *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+   * - When not set, the default behavior is 'auto'.
+   *
+   * When this parameter is set, the response body will include the `service_tier`
+   * utilized.
+   */
+  service_tier?: 'auto' | 'default' | 'flex' | null;
+
   /**
    * The status of the response generation. One of `completed`, `failed`,
    * `in_progress`, or `incomplete`.
@@ -2673,7 +2694,7 @@ export interface ResponseCreateParamsBase {
   input: string | ResponseInput;
 
   /**
-   * Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+   * Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
    * wide range of models with different capabilities, performance characteristics,
    * and price points. Refer to the
    * [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -2740,6 +2761,27 @@ export interface ResponseCreateParamsBase {
    */
   reasoning?: Shared.Reasoning | null;
 
+  /**
+   * Specifies the latency tier to use for processing the request. This parameter is
+   * relevant for customers subscribed to the scale tier service:
+   *
+   * - If set to 'auto', and the Project is Scale tier enabled, the system will
+   *   utilize scale tier credits until they are exhausted.
+   * - If set to 'auto', and the Project is not Scale tier enabled, the request will
+   *   be processed using the default service tier with a lower uptime SLA and no
+   *   latency guarentee.
+   * - If set to 'default', the request will be processed using the default service
+   *   tier with a lower uptime SLA and no latency guarentee.
+   * - If set to 'flex', the request will be processed with the Flex Processing
+   *   service tier.
+   *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+   * - When not set, the default behavior is 'auto'.
+   *
+   * When this parameter is set, the response body will include the `service_tier`
+   * utilized.
+   */
+  service_tier?: 'auto' | 'default' | 'flex' | null;
+
   /**
    * Whether to store the generated model response for later retrieval via API.
    */
diff --git a/src/resources/shared.ts b/src/resources/shared.ts
@@ -15,6 +15,10 @@ export type ChatModel =
   | 'gpt-4.1-2025-04-14'
   | 'gpt-4.1-mini-2025-04-14'
   | 'gpt-4.1-nano-2025-04-14'
+  | 'o4-mini'
+  | 'o4-mini-2025-04-16'
+  | 'o3'
+  | 'o3-2025-04-16'
   | 'o3-mini'
   | 'o3-mini-2025-01-31'
   | 'o1'
@@ -187,13 +191,20 @@ export interface Reasoning {
   effort?: ReasoningEffort | null;
 
   /**
-   * **computer_use_preview only**
+   * @deprecated **Deprecated:** use `summary` instead.
    *
    * A summary of the reasoning performed by the model. This can be useful for
-   * debugging and understanding the model's reasoning process. One of `concise` or
-   * `detailed`.
+   * debugging and understanding the model's reasoning process. One of `auto`,
+   * `concise`, or `detailed`.
    */
-  generate_summary?: 'concise' | 'detailed' | null;
+  generate_summary?: 'auto' | 'concise' | 'detailed' | null;
+
+  /**
+   * A summary of the reasoning performed by the model. This can be useful for
+   * debugging and understanding the model's reasoning process. One of `auto`,
+   * `concise`, or `detailed`.
+   */
+  summary?: 'auto' | 'concise' | 'detailed' | null;
 }
 
 /**
diff --git a/tests/api-resources/responses/responses.test.ts b/tests/api-resources/responses/responses.test.ts
@@ -30,7 +30,8 @@ describe('resource responses', () => {
       metadata: { foo: 'string' },
       parallel_tool_calls: true,
       previous_response_id: 'previous_response_id',
-      reasoning: { effort: 'low', generate_summary: 'concise' },
+      reasoning: { effort: 'low', generate_summary: 'auto', summary: 'auto' },
+      service_tier: 'auto',
       store: true,
       stream: false,
       temperature: 1,