openai · stainless-app · Nov 3, 2025 · Oct 28, 2025 · Oct 31, 2025 · Nov 3, 2025
@@ -1,3 +1,3 @@
 {
-  ".": "6.7.0"
+  ".": "6.8.0"
 }
@@ -1,4 +1,4 @@
 configured_endpoints: 135
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a3c45d9bd3bb25bf4eaa49b7fb473a00038293dec659ffaa44f624ded884abf4.yml
-openapi_spec_hash: 9c20aaf786a0700dabd13d9865481c9e
-config_hash: 50ee3382a63c021a9f821a935950e926
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
+openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
+config_hash: 032995825500a503a76da119f5354905
@@ -1,5 +1,19 @@
 # Changelog
 
+## 6.8.0 (2025-11-03)
+
+Full Changelog: [v6.7.0...v6.8.0](https://github.com/openai/openai-node/compare/v6.7.0...v6.8.0)
+
+### Features
+
+* **api:** Realtime API token_limits, Hybrid searching ranking options ([6a5b48c](https://github.com/openai/openai-node/commit/6a5b48c9c8961bc7409c20546d9d845a4f2674ce))
+* **api:** remove InputAudio from ResponseInputContent ([9909fef](https://github.com/openai/openai-node/commit/9909fef596280fc16174679d97c3e81543c68646))
+
+
+### Chores
+
+* **internal:** codegen related update ([3ad52aa](https://github.com/openai/openai-node/commit/3ad52aaa94cdbe1926fee1e5f8eacebefdfc399d))
+
 ## 6.7.0 (2025-10-24)
 
 Full Changelog: [v6.6.0...v6.7.0](https://github.com/openai/openai-node/compare/v6.6.0...v6.7.0)

@@ -1,6 +1,6 @@
 {
   "name": "@openai/openai",
-  "version": "6.7.0",
+  "version": "6.8.0",
   "exports": {
     ".": "./index.ts",
     "./helpers/zod": "./helpers/zod.ts",

@@ -1,6 +1,6 @@
 {
   "name": "openai",
-  "version": "6.7.0",
+  "version": "6.8.0",
   "description": "The official TypeScript library for the OpenAI API",
   "author": "OpenAI <[email protected]>",
   "types": "dist/index.d.ts",

@@ -545,7 +545,10 @@ export interface ImageEditParamsBase {
   background?: 'transparent' | 'opaque' | 'auto' | null;
 
   /**
-   * Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+   * Control how much effort the model will exert to match the style and features,
+   * especially facial features, of input images. This parameter is only supported
+   * for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+   * `low`. Defaults to `low`.
    */
   input_fidelity?: 'high' | 'low' | null;
 

@@ -177,8 +177,19 @@ export interface CallAcceptParams {
   tracing?: RealtimeAPI.RealtimeTracingConfig | null;
 
   /**
-   * Controls how the realtime conversation is truncated prior to model inference.
-   * The default is `auto`.
+   * When the number of tokens in a conversation exceeds the model's input token
+   * limit, the conversation be truncated, meaning messages (starting from the
+   * oldest) will not be included in the model's context. A 32k context model with
+   * 4,096 max output tokens can only include 28,224 tokens in the context before
+   * truncation occurs. Clients can configure truncation behavior to truncate with a
+   * lower max token limit, which is an effective way to control token usage and
+   * cost. Truncation will reduce the number of cached tokens on the next turn
+   * (busting the cache), since messages are dropped from the beginning of the
+   * context. However, clients can also configure truncation to retain messages up to
+   * a fraction of the maximum context size, which will reduce the need for future
+   * truncations and thus improve the cache rate. Truncation can be disabled
+   * entirely, which means the server will never truncate but would instead return an
+   * error if the conversation exceeds the model's input token limit.
    */
   truncation?: RealtimeAPI.RealtimeTruncation;
 }

@@ -144,8 +144,19 @@ export interface RealtimeSessionCreateResponse {
   tracing?: 'auto' | RealtimeSessionCreateResponse.TracingConfiguration | null;
 
   /**
-   * Controls how the realtime conversation is truncated prior to model inference.
-   * The default is `auto`.
+   * When the number of tokens in a conversation exceeds the model's input token
+   * limit, the conversation be truncated, meaning messages (starting from the
+   * oldest) will not be included in the model's context. A 32k context model with
+   * 4,096 max output tokens can only include 28,224 tokens in the context before
+   * truncation occurs. Clients can configure truncation behavior to truncate with a
+   * lower max token limit, which is an effective way to control token usage and
+   * cost. Truncation will reduce the number of cached tokens on the next turn
+   * (busting the cache), since messages are dropped from the beginning of the
+   * context. However, clients can also configure truncation to retain messages up to
+   * a fraction of the maximum context size, which will reduce the need for future
+   * truncations and thus improve the cache rate. Truncation can be disabled
+   * entirely, which means the server will never truncate but would instead return an
+   * error if the conversation exceeds the model's input token limit.
    */
   truncation?: RealtimeAPI.RealtimeTruncation;
 }

@@ -3050,8 +3050,19 @@ export interface RealtimeSessionCreateRequest {
   tracing?: RealtimeTracingConfig | null;
 
   /**
-   * Controls how the realtime conversation is truncated prior to model inference.
-   * The default is `auto`.
+   * When the number of tokens in a conversation exceeds the model's input token
+   * limit, the conversation be truncated, meaning messages (starting from the
+   * oldest) will not be included in the model's context. A 32k context model with
+   * 4,096 max output tokens can only include 28,224 tokens in the context before
+   * truncation occurs. Clients can configure truncation behavior to truncate with a
+   * lower max token limit, which is an effective way to control token usage and
+   * cost. Truncation will reduce the number of cached tokens on the next turn
+   * (busting the cache), since messages are dropped from the beginning of the
+   * context. However, clients can also configure truncation to retain messages up to
+   * a fraction of the maximum context size, which will reduce the need for future
+   * truncations and thus improve the cache rate. Truncation can be disabled
+   * entirely, which means the server will never truncate but would instead return an
+   * error if the conversation exceeds the model's input token limit.
    */
   truncation?: RealtimeTruncation;
 }
@@ -3474,8 +3485,19 @@ export interface RealtimeTranscriptionSessionCreateRequest {
 }
 
 /**
- * Controls how the realtime conversation is truncated prior to model inference.
- * The default is `auto`.
+ * When the number of tokens in a conversation exceeds the model's input token
+ * limit, the conversation be truncated, meaning messages (starting from the
+ * oldest) will not be included in the model's context. A 32k context model with
+ * 4,096 max output tokens can only include 28,224 tokens in the context before
+ * truncation occurs. Clients can configure truncation behavior to truncate with a
+ * lower max token limit, which is an effective way to control token usage and
+ * cost. Truncation will reduce the number of cached tokens on the next turn
+ * (busting the cache), since messages are dropped from the beginning of the
+ * context. However, clients can also configure truncation to retain messages up to
+ * a fraction of the maximum context size, which will reduce the need for future
+ * truncations and thus improve the cache rate. Truncation can be disabled
+ * entirely, which means the server will never truncate but would instead return an
+ * error if the conversation exceeds the model's input token limit.
  */
 export type RealtimeTruncation = 'auto' | 'disabled' | RealtimeTruncationRetentionRatio;
 
@@ -3486,15 +3508,40 @@ export type RealtimeTruncation = 'auto' | 'disabled' | RealtimeTruncationRetenti
  */
 export interface RealtimeTruncationRetentionRatio {
   /**
-   * Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
-   * conversation exceeds the input token limit.
+   * Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+   * the conversation exceeds the input token limit. Setting this to `0.8` means that
+   * messages will be dropped until 80% of the maximum allowed tokens are used. This
+   * helps reduce the frequency of truncations and improve cache rates.
    */
   retention_ratio: number;
 
   /**
    * Use retention ratio truncation.
    */
   type: 'retention_ratio';
+
+  /**
+   * Optional custom token limits for this truncation strategy. If not provided, the
+   * model's default token limits will be used.
+   */
+  token_limits?: RealtimeTruncationRetentionRatio.TokenLimits;
+}
+
+export namespace RealtimeTruncationRetentionRatio {
+  /**
+   * Optional custom token limits for this truncation strategy. If not provided, the
+   * model's default token limits will be used.
+   */
+  export interface TokenLimits {
+    /**
+     * Maximum tokens allowed in the conversation after instructions (which including
+     * tool definitions). For example, setting this to 5,000 would mean that truncation
+     * would occur when the conversation exceeds 5,000 tokens after instructions. This
+     * cannot be higher than the model's context window size minus the maximum output
+     * tokens.
+     */
+    post_instructions?: number;
+  }
 }
 
 /**

@@ -232,6 +232,10 @@ export interface ComputerTool {
   type: 'computer_use_preview';
 }
 
+/**
+ * A custom tool that processes input using a specified format. Learn more about
+ * [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+ */
 export interface CustomTool {
   /**
    * The name of the custom tool, used to identify it in tool calls.
@@ -318,6 +322,12 @@ export namespace FileSearchTool {
    * Ranking options for search.
    */
   export interface RankingOptions {
+    /**
+     * Weights that control how reciprocal rank fusion balances semantic embedding
+     * matches versus sparse keyword matches when hybrid search is enabled.
+     */
+    hybrid_search?: RankingOptions.HybridSearch;
+
     /**
      * The ranker to use for the file search.
      */
@@ -330,6 +340,24 @@ export namespace FileSearchTool {
      */
     score_threshold?: number;
   }
+
+  export namespace RankingOptions {
+    /**
+     * Weights that control how reciprocal rank fusion balances semantic embedding
+     * matches versus sparse keyword matches when hybrid search is enabled.
+     */
+    export interface HybridSearch {
+      /**
+       * The weight of the embedding in the reciprocal ranking fusion.
+       */
+      embedding_weight: number;
+
+      /**
+       * The weight of the text in the reciprocal ranking fusion.
+       */
+      text_weight: number;
+    }
+  }
 }
 
 /**
@@ -1278,7 +1306,6 @@ export type ResponseContent =
   | ResponseInputText
   | ResponseInputImage
   | ResponseInputFile
-  | ResponseInputAudio
   | ResponseOutputText
   | ResponseOutputRefusal
   | ResponseContent.ReasoningTextContent;
@@ -2301,11 +2328,7 @@ export namespace ResponseInputAudio {
 /**
  * A text input to the model.
  */
-export type ResponseInputContent =
-  | ResponseInputText
-  | ResponseInputImage
-  | ResponseInputFile
-  | ResponseInputAudio;
+export type ResponseInputContent = ResponseInputText | ResponseInputImage | ResponseInputFile;
 
 /**
  * A file input to the model.
@@ -3847,6 +3870,8 @@ export interface ResponseOutputText {
     | ResponseOutputText.FilePath
   >;
 
+  logprobs: Array<ResponseOutputText.Logprob>;
+
   /**
    * The text output from the model.
    */
@@ -3856,8 +3881,6 @@ export interface ResponseOutputText {
    * The type of the output text. Always `output_text`.
    */
   type: 'output_text';
-
-  logprobs?: Array<ResponseOutputText.Logprob>;
 }
 
 export namespace ResponseOutputText {
@@ -5048,6 +5071,8 @@ export namespace Tool {
        * An optional list of uploaded files to make available to your code.
        */
       file_ids?: Array<string>;
+
+      memory_limit?: '1g' | '4g' | '16g' | '64g' | null;
     }
   }
 
@@ -5067,7 +5092,10 @@ export namespace Tool {
     background?: 'transparent' | 'opaque' | 'auto';
 
     /**
-     * Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+     * Control how much effort the model will exert to match the style and features,
+     * especially facial features, of input images. This parameter is only supported
+     * for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+     * `low`. Defaults to `low`.
      */
     input_fidelity?: 'high' | 'low' | null;
 
@@ -5135,6 +5163,9 @@ export namespace Tool {
     }
   }
 
+  /**
+   * A tool that allows the model to execute shell commands in a local environment.
+   */
   export interface LocalShell {
     /**
      * The type of the local shell tool. Always `local_shell`.

@@ -135,13 +135,19 @@ export interface CompoundFilter {
 export type CustomToolInputFormat = CustomToolInputFormat.Text | CustomToolInputFormat.Grammar;
 
 export namespace CustomToolInputFormat {
+  /**
+   * Unconstrained free-form text.
+   */
   export interface Text {
     /**
      * Unconstrained text format. Always `text`.
      */
     type: 'text';
   }
 
+  /**
+   * A grammar defined by the user.
+   */
   export interface Grammar {
     /**
      * The grammar definition.

@@ -255,13 +255,6 @@ export namespace VectorStoreFileBatch {
 }
 
 export interface FileBatchCreateParams {
-  /**
-   * A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-   * the vector store should use. Useful for tools like `file_search` that can access
-   * files.
-   */
-  file_ids: Array<string>;
-
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
    * for storing additional information about the object in a structured format, and
@@ -276,6 +269,48 @@ export interface FileBatchCreateParams {
    * strategy. Only applicable if `file_ids` is non-empty.
    */
   chunking_strategy?: VectorStoresAPI.FileChunkingStrategyParam;
+
+  /**
+   * A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+   * the vector store should use. Useful for tools like `file_search` that can access
+   * files. If `attributes` or `chunking_strategy` are provided, they will be applied
+   * to all files in the batch. Mutually exclusive with `files`.
+   */
+  file_ids?: Array<string>;
+
+  /**
+   * A list of objects that each include a `file_id` plus optional `attributes` or
+   * `chunking_strategy`. Use this when you need to override metadata for specific
+   * files. The global `attributes` or `chunking_strategy` will be ignored and must
+   * be specified for each file. Mutually exclusive with `file_ids`.
+   */
+  files?: Array<FileBatchCreateParams.File>;
+}
+
+export namespace FileBatchCreateParams {
+  export interface File {
+    /**
+     * A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+     * vector store should use. Useful for tools like `file_search` that can access
+     * files.
+     */
+    file_id: string;
+
+    /**
+     * Set of 16 key-value pairs that can be attached to an object. This can be useful
+     * for storing additional information about the object in a structured format, and
+     * querying for objects via API or the dashboard. Keys are strings with a maximum
+     * length of 64 characters. Values are strings with a maximum length of 512
+     * characters, booleans, or numbers.
+     */
+    attributes?: { [key: string]: string | number | boolean } | null;
+
+    /**
+     * The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+     * strategy. Only applicable if `file_ids` is non-empty.
+     */
+    chunking_strategy?: VectorStoresAPI.FileChunkingStrategyParam;
+  }
 }
 
 export interface FileBatchRetrieveParams {