diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2ed1eead8..b0aac41b2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,6 +10,7 @@ on:
 
 jobs:
   lint:
+    timeout-minutes: 10
     name: lint
     runs-on: ubuntu-latest
     steps:
@@ -27,6 +28,7 @@ jobs:
         run: ./scripts/lint
 
   build:
+    timeout-minutes: 5
     name: build
     runs-on: ubuntu-latest
     permissions:
@@ -61,6 +63,7 @@ jobs:
           SHA: ${{ github.sha }}
         run: ./scripts/utils/upload-artifact.sh
   test:
+    timeout-minutes: 10
     name: test
     runs-on: ubuntu-latest
     steps:
@@ -78,6 +81,7 @@ jobs:
         run: ./scripts/test
 
   examples:
+    timeout-minutes: 10
     name: examples
     runs-on: ubuntu-latest
     if: github.repository == 'openai/openai-node'
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 2f61d58b0..5b0015f5b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "4.95.1"
+  ".": "4.96.0"
 }
diff --git a/.stats.yml b/.stats.yml
index 848c5b5ad..d92408173 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 97
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml
-openapi_spec_hash: c855121b2b2324b99499c9244c21d24d
-config_hash: d20837393b73efdb19cd08e04c1cc9a1
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-8b68ae6b807dca92e914da1dd9e835a20f69b075e79102a264367fd7fddddb33.yml
+openapi_spec_hash: b6ade5b1a6327339e6669e1134de2d03
+config_hash: b597cd9a31e9e5ec709e2eefb4c54122
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1f864e203..47717a4eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,23 @@
 # Changelog
 
+## 4.96.0 (2025-04-23)
+
+Full Changelog: [v4.95.1...v4.96.0](https://github.com/openai/openai-node/compare/v4.95.1...v4.96.0)
+
+### Features
+
+* **api:** adding new image model support ([a00d331](https://github.com/openai/openai-node/commit/a00d33190edd08df7d9c088c00ab7b77673f88ba))
+
+
+### Bug Fixes
+
+* **types:** export AssistantStream ([#1472](https://github.com/openai/openai-node/issues/1472)) ([626c844](https://github.com/openai/openai-node/commit/626c844a758a68ffbff48873d4773be2e3868952))
+
+
+### Chores
+
+* **ci:** add timeout thresholds for CI jobs ([e465063](https://github.com/openai/openai-node/commit/e46506351097f1de39c866c28b6ec20fa724fc36))
+
 ## 4.95.1 (2025-04-18)
 
 Full Changelog: [v4.95.0...v4.95.1](https://github.com/openai/openai-node/compare/v4.95.0...v4.95.1)
diff --git a/api.md b/api.md
index 2eb54b34a..49e6548a8 100644
--- a/api.md
+++ b/api.md
@@ -249,7 +249,7 @@ Methods:
 
 - <code title="post /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fineTuning.checkpoints.permissions.<a href="./src/resources/fine-tuning/checkpoints/permissions.ts">create</a>(fineTunedModelCheckpoint, { ...params }) -> PermissionCreateResponsesPage</code>
 - <code title="get /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fineTuning.checkpoints.permissions.<a href="./src/resources/fine-tuning/checkpoints/permissions.ts">retrieve</a>(fineTunedModelCheckpoint, { ...params }) -> PermissionRetrieveResponse</code>
-- <code title="delete /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fineTuning.checkpoints.permissions.<a href="./src/resources/fine-tuning/checkpoints/permissions.ts">del</a>(fineTunedModelCheckpoint) -> PermissionDeleteResponse</code>
+- <code title="delete /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}">client.fineTuning.checkpoints.permissions.<a href="./src/resources/fine-tuning/checkpoints/permissions.ts">del</a>(fineTunedModelCheckpoint, permissionId) -> PermissionDeleteResponse</code>
 
 # VectorStores
 
@@ -626,6 +626,10 @@ Types:
 - <code><a href="./src/resources/responses/responses.ts">ResponseOutputRefusal</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ResponseOutputText</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ResponseReasoningItem</a></code>
+- <code><a href="./src/resources/responses/responses.ts">ResponseReasoningSummaryPartAddedEvent</a></code>
+- <code><a href="./src/resources/responses/responses.ts">ResponseReasoningSummaryPartDoneEvent</a></code>
+- <code><a href="./src/resources/responses/responses.ts">ResponseReasoningSummaryTextDeltaEvent</a></code>
+- <code><a href="./src/resources/responses/responses.ts">ResponseReasoningSummaryTextDoneEvent</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ResponseRefusalDeltaEvent</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ResponseRefusalDoneEvent</a></code>
 - <code><a href="./src/resources/responses/responses.ts">ResponseStatus</a></code>
diff --git a/jsr.json b/jsr.json
index 8271c8522..6b574ce15 100644
--- a/jsr.json
+++ b/jsr.json
@@ -1,6 +1,6 @@
 {
   "name": "@openai/openai",
-  "version": "4.95.1",
+  "version": "4.96.0",
   "exports": {
     ".": "./index.ts",
     "./helpers/zod": "./helpers/zod.ts",
diff --git a/package.json b/package.json
index 76fe7d4d0..7b4e86f8e 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "openai",
-  "version": "4.95.1",
+  "version": "4.96.0",
   "description": "The official TypeScript library for the OpenAI API",
   "author": "OpenAI <support@openai.com>",
   "types": "dist/index.d.ts",
diff --git a/src/resources/beta/assistants.ts b/src/resources/beta/assistants.ts
index bf957db95..00a6ff2cf 100644
--- a/src/resources/beta/assistants.ts
+++ b/src/resources/beta/assistants.ts
@@ -9,6 +9,7 @@ import * as ThreadsAPI from './threads/threads';
 import * as RunsAPI from './threads/runs/runs';
 import * as StepsAPI from './threads/runs/steps';
 import { CursorPage, type CursorPageParams } from '../../pagination';
+import { AssistantStream } from '../../lib/AssistantStream';
 
 export class Assistants extends APIResource {
   /**
@@ -1517,4 +1518,6 @@ export declare namespace Assistants {
     type AssistantUpdateParams as AssistantUpdateParams,
     type AssistantListParams as AssistantListParams,
   };
+
+  export { AssistantStream };
 }
diff --git a/src/resources/beta/realtime/realtime.ts b/src/resources/beta/realtime/realtime.ts
index 1c02fdd1a..5012b1edd 100644
--- a/src/resources/beta/realtime/realtime.ts
+++ b/src/resources/beta/realtime/realtime.ts
@@ -915,12 +915,34 @@ export type RealtimeClientEvent =
   | ConversationItemTruncateEvent
   | InputAudioBufferAppendEvent
   | InputAudioBufferClearEvent
+  | RealtimeClientEvent.OutputAudioBufferClear
   | InputAudioBufferCommitEvent
   | ResponseCancelEvent
   | ResponseCreateEvent
   | SessionUpdateEvent
   | TranscriptionSessionUpdate;
 
+export namespace RealtimeClientEvent {
+  /**
+   * **WebRTC Only:** Emit to cut off the current audio response. This will trigger
+   * the server to stop generating audio and emit a `output_audio_buffer.cleared`
+   * event. This event should be preceded by a `response.cancel` client event to stop
+   * the generation of the current response.
+   * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+   */
+  export interface OutputAudioBufferClear {
+    /**
+     * The event type, must be `output_audio_buffer.clear`.
+     */
+    type: 'output_audio_buffer.clear';
+
+    /**
+     * The unique ID of the client event used for error handling.
+     */
+    event_id?: string;
+  }
+}
+
 /**
  * The response resource.
  */
@@ -1174,7 +1196,10 @@ export type RealtimeServerEvent =
   | ResponseTextDoneEvent
   | SessionCreatedEvent
   | SessionUpdatedEvent
-  | TranscriptionSessionUpdatedEvent;
+  | TranscriptionSessionUpdatedEvent
+  | RealtimeServerEvent.OutputAudioBufferStarted
+  | RealtimeServerEvent.OutputAudioBufferStopped
+  | RealtimeServerEvent.OutputAudioBufferCleared;
 
 export namespace RealtimeServerEvent {
   /**
@@ -1197,6 +1222,77 @@ export namespace RealtimeServerEvent {
      */
     type: 'conversation.item.retrieved';
   }
+
+  /**
+   * **WebRTC Only:** Emitted when the server begins streaming audio to the client.
+   * This event is emitted after an audio content part has been added
+   * (`response.content_part.added`) to the response.
+   * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+   */
+  export interface OutputAudioBufferStarted {
+    /**
+     * The unique ID of the server event.
+     */
+    event_id: string;
+
+    /**
+     * The unique ID of the response that produced the audio.
+     */
+    response_id: string;
+
+    /**
+     * The event type, must be `output_audio_buffer.started`.
+     */
+    type: 'output_audio_buffer.started';
+  }
+
+  /**
+   * **WebRTC Only:** Emitted when the output audio buffer has been completely
+   * drained on the server, and no more audio is forthcoming. This event is emitted
+   * after the full response data has been sent to the client (`response.done`).
+   * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+   */
+  export interface OutputAudioBufferStopped {
+    /**
+     * The unique ID of the server event.
+     */
+    event_id: string;
+
+    /**
+     * The unique ID of the response that produced the audio.
+     */
+    response_id: string;
+
+    /**
+     * The event type, must be `output_audio_buffer.stopped`.
+     */
+    type: 'output_audio_buffer.stopped';
+  }
+
+  /**
+   * **WebRTC Only:** Emitted when the output audio buffer is cleared. This happens
+   * either in VAD mode when the user has interrupted
+   * (`input_audio_buffer.speech_started`), or when the client has emitted the
+   * `output_audio_buffer.clear` event to manually cut off the current audio
+   * response.
+   * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+   */
+  export interface OutputAudioBufferCleared {
+    /**
+     * The unique ID of the server event.
+     */
+    event_id: string;
+
+    /**
+     * The unique ID of the response that produced the audio.
+     */
+    response_id: string;
+
+    /**
+     * The event type, must be `output_audio_buffer.cleared`.
+     */
+    type: 'output_audio_buffer.cleared';
+  }
 }
 
 /**
diff --git a/src/resources/beta/threads/threads.ts b/src/resources/beta/threads/threads.ts
index 8075ba0ac..2e5ab1cc8 100644
--- a/src/resources/beta/threads/threads.ts
+++ b/src/resources/beta/threads/threads.ts
@@ -686,9 +686,7 @@ export interface ThreadCreateAndRunParamsBase {
    * Override the tools the assistant can use for this run. This is useful for
    * modifying the behavior on a per-run basis.
    */
-  tools?: Array<
-    AssistantsAPI.CodeInterpreterTool | AssistantsAPI.FileSearchTool | AssistantsAPI.FunctionTool
-  > | null;
+  tools?: Array<AssistantsAPI.AssistantTool> | null;
 
   /**
    * An alternative to sampling with temperature, called nucleus sampling, where the
@@ -1718,4 +1716,6 @@ export declare namespace Threads {
     type MessageUpdateParams as MessageUpdateParams,
     type MessageListParams as MessageListParams,
   };
+
+  export { AssistantStream };
 }
diff --git a/src/resources/evals/evals.ts b/src/resources/evals/evals.ts
index 84ff6d1bb..caef7acc0 100644
--- a/src/resources/evals/evals.ts
+++ b/src/resources/evals/evals.ts
@@ -4,6 +4,7 @@ import { APIResource } from '../../resource';
 import { isRequestOptions } from '../../core';
 import * as Core from '../../core';
 import * as Shared from '../shared';
+import * as ResponsesAPI from '../responses/responses';
 import * as RunsAPI from './runs/runs';
 import {
   CreateEvalCompletionsRunDataSource,
@@ -107,7 +108,7 @@ export interface EvalCustomDataSourceConfig {
  * the evaluation.
  */
 export interface EvalLabelModelGrader {
-  input: Array<EvalLabelModelGrader.InputMessage | EvalLabelModelGrader.Assistant>;
+  input: Array<EvalLabelModelGrader.Input>;
 
   /**
    * The labels to assign to each item in the evaluation.
@@ -136,57 +137,43 @@ export interface EvalLabelModelGrader {
 }
 
 export namespace EvalLabelModelGrader {
-  export interface InputMessage {
-    content: InputMessage.Content;
-
+  /**
+   * A message input to the model with a role indicating instruction following
+   * hierarchy. Instructions given with the `developer` or `system` role take
+   * precedence over instructions given with the `user` role. Messages with the
+   * `assistant` role are presumed to have been generated by the model in previous
+   * interactions.
+   */
+  export interface Input {
     /**
-     * The role of the message. One of `user`, `system`, or `developer`.
+     * Text inputs to the model - can contain template strings.
      */
-    role: 'user' | 'system' | 'developer';
+    content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
 
     /**
-     * The type of item, which is always `message`.
+     * The role of the message input. One of `user`, `assistant`, `system`, or
+     * `developer`.
      */
-    type: 'message';
-  }
-
-  export namespace InputMessage {
-    export interface Content {
-      /**
-       * The text content.
-       */
-      text: string;
-
-      /**
-       * The type of content, which is always `input_text`.
-       */
-      type: 'input_text';
-    }
-  }
-
-  export interface Assistant {
-    content: Assistant.Content;
+    role: 'user' | 'assistant' | 'system' | 'developer';
 
     /**
-     * The role of the message. Must be `assistant` for output.
+     * The type of the message input. Always `message`.
      */
-    role: 'assistant';
+    type?: 'message';
+  }
 
+  export namespace Input {
     /**
-     * The type of item, which is always `message`.
+     * A text output from the model.
      */
-    type: 'message';
-  }
-
-  export namespace Assistant {
-    export interface Content {
+    export interface OutputText {
       /**
-       * The text content.
+       * The text output from the model.
        */
       text: string;
 
       /**
-       * The type of content, which is always `output_text`.
+       * The type of the output text. Always `output_text`.
        */
       type: 'output_text';
     }
@@ -259,8 +246,8 @@ export interface EvalStringCheckGrader {
  */
 export interface EvalTextSimilarityGrader {
   /**
-   * The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, `gleu`,
-   * `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
+   * The evaluation metric to use. One of `fuzzy_match`, `bleu`, `gleu`, `meteor`,
+   * `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
    */
   evaluation_metric:
     | 'fuzzy_match'
@@ -272,8 +259,7 @@ export interface EvalTextSimilarityGrader {
     | 'rouge_3'
     | 'rouge_4'
     | 'rouge_5'
-    | 'rouge_l'
-    | 'cosine';
+    | 'rouge_l';
 
   /**
    * The text being graded.
@@ -346,14 +332,131 @@ export interface EvalCreateResponse {
   object: 'eval';
 
   /**
-   * Indicates whether the evaluation is shared with OpenAI.
+   * A list of testing criteria.
    */
-  share_with_openai: boolean;
+  testing_criteria: Array<
+    | EvalLabelModelGrader
+    | EvalStringCheckGrader
+    | EvalTextSimilarityGrader
+    | EvalCreateResponse.Python
+    | EvalCreateResponse.ScoreModel
+  >;
+}
 
+export namespace EvalCreateResponse {
   /**
-   * A list of testing criteria.
+   * A PythonGrader object that runs a python script on the input.
    */
-  testing_criteria: Array<EvalLabelModelGrader | EvalStringCheckGrader | EvalTextSimilarityGrader>;
+  export interface Python {
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The source code of the python script.
+     */
+    source: string;
+
+    /**
+     * The object type, which is always `python`.
+     */
+    type: 'python';
+
+    /**
+     * The image tag to use for the python script.
+     */
+    image_tag?: string;
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+  }
+
+  /**
+   * A ScoreModelGrader object that uses a model to assign a score to the input.
+   */
+  export interface ScoreModel {
+    /**
+     * The input text. This may include template strings.
+     */
+    input: Array<ScoreModel.Input>;
+
+    /**
+     * The model to use for the evaluation.
+     */
+    model: string;
+
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The object type, which is always `score_model`.
+     */
+    type: 'score_model';
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+
+    /**
+     * The range of the score. Defaults to `[0, 1]`.
+     */
+    range?: Array<number>;
+
+    /**
+     * The sampling parameters for the model.
+     */
+    sampling_params?: unknown;
+  }
+
+  export namespace ScoreModel {
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface Input {
+      /**
+       * Text inputs to the model - can contain template strings.
+       */
+      content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
+
+      /**
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
+       */
+      role: 'user' | 'assistant' | 'system' | 'developer';
+
+      /**
+       * The type of the message input. Always `message`.
+       */
+      type?: 'message';
+    }
+
+    export namespace Input {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
+        /**
+         * The text output from the model.
+         */
+        text: string;
+
+        /**
+         * The type of the output text. Always `output_text`.
+         */
+        type: 'output_text';
+      }
+    }
+  }
 }
 
 /**
@@ -401,14 +504,131 @@ export interface EvalRetrieveResponse {
   object: 'eval';
 
   /**
-   * Indicates whether the evaluation is shared with OpenAI.
+   * A list of testing criteria.
    */
-  share_with_openai: boolean;
+  testing_criteria: Array<
+    | EvalLabelModelGrader
+    | EvalStringCheckGrader
+    | EvalTextSimilarityGrader
+    | EvalRetrieveResponse.Python
+    | EvalRetrieveResponse.ScoreModel
+  >;
+}
 
+export namespace EvalRetrieveResponse {
   /**
-   * A list of testing criteria.
+   * A PythonGrader object that runs a python script on the input.
+   */
+  export interface Python {
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The source code of the python script.
+     */
+    source: string;
+
+    /**
+     * The object type, which is always `python`.
+     */
+    type: 'python';
+
+    /**
+     * The image tag to use for the python script.
+     */
+    image_tag?: string;
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+  }
+
+  /**
+   * A ScoreModelGrader object that uses a model to assign a score to the input.
    */
-  testing_criteria: Array<EvalLabelModelGrader | EvalStringCheckGrader | EvalTextSimilarityGrader>;
+  export interface ScoreModel {
+    /**
+     * The input text. This may include template strings.
+     */
+    input: Array<ScoreModel.Input>;
+
+    /**
+     * The model to use for the evaluation.
+     */
+    model: string;
+
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The object type, which is always `score_model`.
+     */
+    type: 'score_model';
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+
+    /**
+     * The range of the score. Defaults to `[0, 1]`.
+     */
+    range?: Array<number>;
+
+    /**
+     * The sampling parameters for the model.
+     */
+    sampling_params?: unknown;
+  }
+
+  export namespace ScoreModel {
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface Input {
+      /**
+       * Text inputs to the model - can contain template strings.
+       */
+      content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
+
+      /**
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
+       */
+      role: 'user' | 'assistant' | 'system' | 'developer';
+
+      /**
+       * The type of the message input. Always `message`.
+       */
+      type?: 'message';
+    }
+
+    export namespace Input {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
+        /**
+         * The text output from the model.
+         */
+        text: string;
+
+        /**
+         * The type of the output text. Always `output_text`.
+         */
+        type: 'output_text';
+      }
+    }
+  }
 }
 
 /**
@@ -456,14 +676,131 @@ export interface EvalUpdateResponse {
   object: 'eval';
 
   /**
-   * Indicates whether the evaluation is shared with OpenAI.
+   * A list of testing criteria.
+   */
+  testing_criteria: Array<
+    | EvalLabelModelGrader
+    | EvalStringCheckGrader
+    | EvalTextSimilarityGrader
+    | EvalUpdateResponse.Python
+    | EvalUpdateResponse.ScoreModel
+  >;
+}
+
+export namespace EvalUpdateResponse {
+  /**
+   * A PythonGrader object that runs a python script on the input.
    */
-  share_with_openai: boolean;
+  export interface Python {
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The source code of the python script.
+     */
+    source: string;
+
+    /**
+     * The object type, which is always `python`.
+     */
+    type: 'python';
+
+    /**
+     * The image tag to use for the python script.
+     */
+    image_tag?: string;
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+  }
 
   /**
-   * A list of testing criteria.
+   * A ScoreModelGrader object that uses a model to assign a score to the input.
    */
-  testing_criteria: Array<EvalLabelModelGrader | EvalStringCheckGrader | EvalTextSimilarityGrader>;
+  export interface ScoreModel {
+    /**
+     * The input text. This may include template strings.
+     */
+    input: Array<ScoreModel.Input>;
+
+    /**
+     * The model to use for the evaluation.
+     */
+    model: string;
+
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The object type, which is always `score_model`.
+     */
+    type: 'score_model';
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+
+    /**
+     * The range of the score. Defaults to `[0, 1]`.
+     */
+    range?: Array<number>;
+
+    /**
+     * The sampling parameters for the model.
+     */
+    sampling_params?: unknown;
+  }
+
+  export namespace ScoreModel {
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface Input {
+      /**
+       * Text inputs to the model - can contain template strings.
+       */
+      content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
+
+      /**
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
+       */
+      role: 'user' | 'assistant' | 'system' | 'developer';
+
+      /**
+       * The type of the message input. Always `message`.
+       */
+      type?: 'message';
+    }
+
+    export namespace Input {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
+        /**
+         * The text output from the model.
+         */
+        text: string;
+
+        /**
+         * The type of the output text. Always `output_text`.
+         */
+        type: 'output_text';
+      }
+    }
+  }
 }
 
 /**
@@ -511,14 +848,131 @@ export interface EvalListResponse {
   object: 'eval';
 
   /**
-   * Indicates whether the evaluation is shared with OpenAI.
+   * A list of testing criteria.
    */
-  share_with_openai: boolean;
+  testing_criteria: Array<
+    | EvalLabelModelGrader
+    | EvalStringCheckGrader
+    | EvalTextSimilarityGrader
+    | EvalListResponse.Python
+    | EvalListResponse.ScoreModel
+  >;
+}
 
+export namespace EvalListResponse {
   /**
-   * A list of testing criteria.
+   * A PythonGrader object that runs a python script on the input.
+   */
+  export interface Python {
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The source code of the python script.
+     */
+    source: string;
+
+    /**
+     * The object type, which is always `python`.
+     */
+    type: 'python';
+
+    /**
+     * The image tag to use for the python script.
+     */
+    image_tag?: string;
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+  }
+
+  /**
+   * A ScoreModelGrader object that uses a model to assign a score to the input.
    */
-  testing_criteria: Array<EvalLabelModelGrader | EvalStringCheckGrader | EvalTextSimilarityGrader>;
+  export interface ScoreModel {
+    /**
+     * The input text. This may include template strings.
+     */
+    input: Array<ScoreModel.Input>;
+
+    /**
+     * The model to use for the evaluation.
+     */
+    model: string;
+
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The object type, which is always `score_model`.
+     */
+    type: 'score_model';
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+
+    /**
+     * The range of the score. Defaults to `[0, 1]`.
+     */
+    range?: Array<number>;
+
+    /**
+     * The sampling parameters for the model.
+     */
+    sampling_params?: unknown;
+  }
+
+  export namespace ScoreModel {
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface Input {
+      /**
+       * Text inputs to the model - can contain template strings.
+       */
+      content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
+
+      /**
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
+       */
+      role: 'user' | 'assistant' | 'system' | 'developer';
+
+      /**
+       * The type of the message input. Always `message`.
+       */
+      type?: 'message';
+    }
+
+    export namespace Input {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
+        /**
+         * The text output from the model.
+         */
+        text: string;
+
+        /**
+         * The type of the output text. Always `output_text`.
+         */
+        type: 'output_text';
+      }
+    }
+  }
 }
 
 export interface EvalDeleteResponse {
@@ -533,12 +987,18 @@ export interface EvalCreateParams {
   /**
    * The configuration for the data source used for the evaluation runs.
    */
-  data_source_config: EvalCreateParams.Custom | EvalCreateParams.StoredCompletions;
+  data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs;
 
   /**
    * A list of graders for all eval runs in this group.
    */
-  testing_criteria: Array<EvalCreateParams.LabelModel | EvalStringCheckGrader | EvalTextSimilarityGrader>;
+  testing_criteria: Array<
+    | EvalCreateParams.LabelModel
+    | EvalStringCheckGrader
+    | EvalTextSimilarityGrader
+    | EvalCreateParams.Python
+    | EvalCreateParams.ScoreModel
+  >;
 
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -554,11 +1014,6 @@ export interface EvalCreateParams {
    * The name of the evaluation.
    */
   name?: string;
-
-  /**
-   * Indicates whether the evaluation is shared with OpenAI.
-   */
-  share_with_openai?: boolean;
 }
 
 export namespace EvalCreateParams {
@@ -572,7 +1027,7 @@ export namespace EvalCreateParams {
    */
   export interface Custom {
     /**
-     * The json schema for the run data source items.
+     * The json schema for each row in the data source.
      */
     item_schema: Record<string, unknown>;
 
@@ -582,7 +1037,8 @@ export namespace EvalCreateParams {
     type: 'custom';
 
     /**
-     * Whether to include the sample schema in the data source.
+     * Whether the eval should expect you to populate the sample namespace (ie, by
+     * generating responses off of your data source)
      */
     include_sample_schema?: boolean;
   }
@@ -592,21 +1048,16 @@ export namespace EvalCreateParams {
    * completions query. This is usually metadata like `usecase=chatbot` or
    * `prompt-version=v2`, etc.
    */
-  export interface StoredCompletions {
+  export interface Logs {
     /**
-     * The type of data source. Always `stored_completions`.
+     * The type of data source. Always `logs`.
      */
-    type: 'stored_completions';
+    type: 'logs';
 
     /**
-     * Set of 16 key-value pairs that can be attached to an object. This can be useful
-     * for storing additional information about the object in a structured format, and
-     * querying for objects via API or the dashboard.
-     *
-     * Keys are strings with a maximum length of 64 characters. Values are strings with
-     * a maximum length of 512 characters.
+     * Metadata filters for the logs data source.
      */
-    metadata?: Shared.Metadata | null;
+    metadata?: Record<string, unknown>;
   }
 
   /**
@@ -614,7 +1065,11 @@ export namespace EvalCreateParams {
    * the evaluation.
    */
   export interface LabelModel {
-    input: Array<LabelModel.SimpleInputMessage | LabelModel.InputMessage | LabelModel.OutputMessage>;
+    /**
+     * A list of chat messages forming the prompt or context. May include variable
+     * references to the "item" namespace, ie {{item.name}}.
+     */
+    input: Array<LabelModel.SimpleInputMessage | LabelModel.EvalItem>;
 
     /**
      * The labels to classify to each item in the evaluation.
@@ -655,57 +1110,157 @@ export namespace EvalCreateParams {
       role: string;
     }
 
-    export interface InputMessage {
-      content: InputMessage.Content;
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface EvalItem {
+      /**
+       * Text inputs to the model - can contain template strings.
+       */
+      content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
 
       /**
-       * The role of the message. One of `user`, `system`, or `developer`.
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
        */
-      role: 'user' | 'system' | 'developer';
+      role: 'user' | 'assistant' | 'system' | 'developer';
 
       /**
-       * The type of item, which is always `message`.
+       * The type of the message input. Always `message`.
        */
-      type: 'message';
+      type?: 'message';
     }
 
-    export namespace InputMessage {
-      export interface Content {
+    export namespace EvalItem {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
         /**
-         * The text content.
+         * The text output from the model.
          */
         text: string;
 
         /**
-         * The type of content, which is always `input_text`.
+         * The type of the output text. Always `output_text`.
          */
-        type: 'input_text';
+        type: 'output_text';
       }
     }
+  }
+
+  /**
+   * A PythonGrader object that runs a python script on the input.
+   */
+  export interface Python {
+    /**
+     * The name of the grader.
+     */
+    name: string;
 
-    export interface OutputMessage {
-      content: OutputMessage.Content;
+    /**
+     * The source code of the python script.
+     */
+    source: string;
+
+    /**
+     * The object type, which is always `python`.
+     */
+    type: 'python';
+
+    /**
+     * The image tag to use for the python script.
+     */
+    image_tag?: string;
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+  }
+
+  /**
+   * A ScoreModelGrader object that uses a model to assign a score to the input.
+   */
+  export interface ScoreModel {
+    /**
+     * The input text. This may include template strings.
+     */
+    input: Array<ScoreModel.Input>;
+
+    /**
+     * The model to use for the evaluation.
+     */
+    model: string;
+
+    /**
+     * The name of the grader.
+     */
+    name: string;
+
+    /**
+     * The object type, which is always `score_model`.
+     */
+    type: 'score_model';
+
+    /**
+     * The threshold for the score.
+     */
+    pass_threshold?: number;
+
+    /**
+     * The range of the score. Defaults to `[0, 1]`.
+     */
+    range?: Array<number>;
 
+    /**
+     * The sampling parameters for the model.
+     */
+    sampling_params?: unknown;
+  }
+
+  export namespace ScoreModel {
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface Input {
       /**
-       * The role of the message. Must be `assistant` for output.
+       * Text inputs to the model - can contain template strings.
        */
-      role: 'assistant';
+      content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
 
       /**
-       * The type of item, which is always `message`.
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
        */
-      type: 'message';
+      role: 'user' | 'assistant' | 'system' | 'developer';
+
+      /**
+       * The type of the message input. Always `message`.
+       */
+      type?: 'message';
     }
 
-    export namespace OutputMessage {
-      export interface Content {
+    export namespace Input {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
         /**
-         * The text content.
+         * The text output from the model.
          */
         text: string;
 
         /**
-         * The type of content, which is always `output_text`.
+         * The type of the output text. Always `output_text`.
          */
         type: 'output_text';
       }
diff --git a/src/resources/evals/runs/runs.ts b/src/resources/evals/runs/runs.ts
index ca2b7f424..50c07a514 100644
--- a/src/resources/evals/runs/runs.ts
+++ b/src/resources/evals/runs/runs.ts
@@ -4,6 +4,7 @@ import { APIResource } from '../../../resource';
 import { isRequestOptions } from '../../../core';
 import * as Core from '../../../core';
 import * as Shared from '../../shared';
+import * as ResponsesAPI from '../../responses/responses';
 import * as OutputItemsAPI from './output-items';
 import {
   OutputItemListParams,
@@ -83,15 +84,6 @@ export class RunListResponsesPage extends CursorPage<RunListResponse> {}
  * A CompletionsRunDataSource object describing a model sampling configuration.
  */
 export interface CreateEvalCompletionsRunDataSource {
-  input_messages:
-    | CreateEvalCompletionsRunDataSource.Template
-    | CreateEvalCompletionsRunDataSource.ItemReference;
-
-  /**
-   * The name of the model to use for generating completions (e.g. "o3-mini").
-   */
-  model: string;
-
   /**
    * A StoredCompletionsRunDataSource configuration describing a set of filters
    */
@@ -105,105 +97,19 @@ export interface CreateEvalCompletionsRunDataSource {
    */
   type: 'completions';
 
+  input_messages?:
+    | CreateEvalCompletionsRunDataSource.Template
+    | CreateEvalCompletionsRunDataSource.ItemReference;
+
+  /**
+   * The name of the model to use for generating completions (e.g. "o3-mini").
+   */
+  model?: string;
+
   sampling_params?: CreateEvalCompletionsRunDataSource.SamplingParams;
 }
 
 export namespace CreateEvalCompletionsRunDataSource {
-  export interface Template {
-    /**
-     * A list of chat messages forming the prompt or context. May include variable
-     * references to the "item" namespace, ie {{item.name}}.
-     */
-    template: Array<Template.ChatMessage | Template.InputMessage | Template.OutputMessage>;
-
-    /**
-     * The type of input messages. Always `template`.
-     */
-    type: 'template';
-  }
-
-  export namespace Template {
-    export interface ChatMessage {
-      /**
-       * The content of the message.
-       */
-      content: string;
-
-      /**
-       * The role of the message (e.g. "system", "assistant", "user").
-       */
-      role: string;
-    }
-
-    export interface InputMessage {
-      content: InputMessage.Content;
-
-      /**
-       * The role of the message. One of `user`, `system`, or `developer`.
-       */
-      role: 'user' | 'system' | 'developer';
-
-      /**
-       * The type of item, which is always `message`.
-       */
-      type: 'message';
-    }
-
-    export namespace InputMessage {
-      export interface Content {
-        /**
-         * The text content.
-         */
-        text: string;
-
-        /**
-         * The type of content, which is always `input_text`.
-         */
-        type: 'input_text';
-      }
-    }
-
-    export interface OutputMessage {
-      content: OutputMessage.Content;
-
-      /**
-       * The role of the message. Must be `assistant` for output.
-       */
-      role: 'assistant';
-
-      /**
-       * The type of item, which is always `message`.
-       */
-      type: 'message';
-    }
-
-    export namespace OutputMessage {
-      export interface Content {
-        /**
-         * The text content.
-         */
-        text: string;
-
-        /**
-         * The type of content, which is always `output_text`.
-         */
-        type: 'output_text';
-      }
-    }
-  }
-
-  export interface ItemReference {
-    /**
-     * A reference to a variable in the "item" namespace. Ie, "item.name"
-     */
-    item_reference: string;
-
-    /**
-     * The type of input messages. Always `item_reference`.
-     */
-    type: 'item_reference';
-  }
-
   export interface FileContent {
     /**
      * The content of the jsonl file.
@@ -240,20 +146,25 @@ export namespace CreateEvalCompletionsRunDataSource {
    * A StoredCompletionsRunDataSource configuration describing a set of filters
    */
   export interface StoredCompletions {
+    /**
+     * The type of source. Always `stored_completions`.
+     */
+    type: 'stored_completions';
+
     /**
      * An optional Unix timestamp to filter items created after this time.
      */
-    created_after: number | null;
+    created_after?: number | null;
 
     /**
      * An optional Unix timestamp to filter items created before this time.
      */
-    created_before: number | null;
+    created_before?: number | null;
 
     /**
      * An optional maximum number of items to return.
      */
-    limit: number | null;
+    limit?: number | null;
 
     /**
      * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -263,17 +174,81 @@ export namespace CreateEvalCompletionsRunDataSource {
      * Keys are strings with a maximum length of 64 characters. Values are strings with
      * a maximum length of 512 characters.
      */
-    metadata: Shared.Metadata | null;
+    metadata?: Shared.Metadata | null;
 
     /**
      * An optional model to filter by (e.g., 'gpt-4o').
      */
-    model: string | null;
+    model?: string | null;
+  }
 
+  export interface Template {
     /**
-     * The type of source. Always `stored_completions`.
+     * A list of chat messages forming the prompt or context. May include variable
+     * references to the "item" namespace, ie {{item.name}}.
      */
-    type: 'stored_completions';
+    template: Array<ResponsesAPI.EasyInputMessage | Template.Message>;
+
+    /**
+     * The type of input messages. Always `template`.
+     */
+    type: 'template';
+  }
+
+  export namespace Template {
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface Message {
+      /**
+       * Text inputs to the model - can contain template strings.
+       */
+      content: string | ResponsesAPI.ResponseInputText | Message.OutputText;
+
+      /**
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
+       */
+      role: 'user' | 'assistant' | 'system' | 'developer';
+
+      /**
+       * The type of the message input. Always `message`.
+       */
+      type?: 'message';
+    }
+
+    export namespace Message {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
+        /**
+         * The text output from the model.
+         */
+        text: string;
+
+        /**
+         * The type of the output text. Always `output_text`.
+         */
+        type: 'output_text';
+      }
+    }
+  }
+
+  export interface ItemReference {
+    /**
+     * A reference to a variable in the "item" namespace. Ie, "item.name"
+     */
+    item_reference: string;
+
+    /**
+     * The type of input messages. Always `item_reference`.
+     */
+    type: 'item_reference';
   }
 
   export interface SamplingParams {
@@ -378,7 +353,10 @@ export interface RunCreateResponse {
   /**
    * Information about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunCreateResponse.Completions;
 
   /**
    * An object representing an error response from the Eval API.
@@ -442,162 +420,240 @@ export interface RunCreateResponse {
 }
 
 export namespace RunCreateResponse {
-  export interface PerModelUsage {
-    /**
-     * The number of tokens retrieved from cache.
-     */
-    cached_tokens: number;
-
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
     /**
-     * The number of completion tokens generated.
+     * A EvalResponsesSource object describing a run data source configuration.
      */
-    completion_tokens: number;
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
 
     /**
-     * The number of invocations.
+     * The type of run data source. Always `completions`.
      */
-    invocation_count: number;
+    type: 'completions';
 
-    /**
-     * The name of the model.
-     */
-    model_name: string;
+    input_messages?: Completions.Template | Completions.ItemReference;
 
     /**
-     * The number of prompt tokens used.
+     * The name of the model to use for generating completions (e.g. "o3-mini").
      */
-    prompt_tokens: number;
+    model?: string;
 
-    /**
-     * The total number of tokens used.
-     */
-    total_tokens: number;
+    sampling_params?: Completions.SamplingParams;
   }
 
-  export interface PerTestingCriteriaResult {
-    /**
-     * Number of tests failed for this criteria.
-     */
-    failed: number;
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
 
-    /**
-     * Number of tests passed for this criteria.
-     */
-    passed: number;
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
 
-    /**
-     * A description of the testing criteria.
-     */
-    testing_criteria: string;
-  }
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
 
-  /**
-   * Counters summarizing the outcomes of the evaluation run.
-   */
-  export interface ResultCounts {
-    /**
-     * Number of output items that resulted in an error.
-     */
-    errored: number;
+        sample?: Record<string, unknown>;
+      }
+    }
 
-    /**
-     * Number of output items that failed to pass the evaluation.
-     */
-    failed: number;
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
 
-    /**
-     * Number of output items that passed the evaluation.
-     */
-    passed: number;
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
 
     /**
-     * Total number of executed output items.
+     * A EvalResponsesSource object describing a run data source configuration.
      */
-    total: number;
-  }
-}
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
 
-/**
- * A schema representing an evaluation run.
- */
-export interface RunRetrieveResponse {
-  /**
-   * Unique identifier for the evaluation run.
-   */
-  id: string;
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
 
-  /**
-   * Unix timestamp (in seconds) when the evaluation run was created.
-   */
-  created_at: number;
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
 
-  /**
-   * Information about the run's data source.
-   */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
 
-  /**
-   * An object representing an error response from the Eval API.
-   */
-  error: EvalAPIError;
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
 
-  /**
-   * The identifier of the associated evaluation.
-   */
-  eval_id: string;
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
 
-  /**
-   * Set of 16 key-value pairs that can be attached to an object. This can be useful
-   * for storing additional information about the object in a structured format, and
-   * querying for objects via API or the dashboard.
-   *
-   * Keys are strings with a maximum length of 64 characters. Values are strings with
-   * a maximum length of 512 characters.
-   */
-  metadata: Shared.Metadata | null;
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
 
-  /**
-   * The model that is evaluated, if applicable.
-   */
-  model: string;
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
 
-  /**
-   * The name of the evaluation run.
-   */
-  name: string;
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
 
-  /**
-   * The type of the object. Always "eval.run".
-   */
-  object: 'eval.run';
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
 
-  /**
-   * Usage statistics for each model during the evaluation run.
-   */
-  per_model_usage: Array<RunRetrieveResponse.PerModelUsage>;
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
 
-  /**
-   * Results per testing criteria applied during the evaluation run.
-   */
-  per_testing_criteria_results: Array<RunRetrieveResponse.PerTestingCriteriaResult>;
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
 
-  /**
-   * The URL to the rendered evaluation run report on the UI dashboard.
-   */
-  report_url: string;
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
 
-  /**
-   * Counters summarizing the outcomes of the evaluation run.
-   */
-  result_counts: RunRetrieveResponse.ResultCounts;
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
 
-  /**
-   * The status of the evaluation run.
-   */
-  status: string;
-}
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
 
-export namespace RunRetrieveResponse {
   export interface PerModelUsage {
     /**
      * The number of tokens retrieved from cache.
@@ -676,7 +732,7 @@ export namespace RunRetrieveResponse {
 /**
  * A schema representing an evaluation run.
  */
-export interface RunListResponse {
+export interface RunRetrieveResponse {
   /**
    * Unique identifier for the evaluation run.
    */
@@ -690,7 +746,10 @@ export interface RunListResponse {
   /**
    * Information about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunRetrieveResponse.Completions;
 
   /**
    * An object representing an error response from the Eval API.
@@ -730,12 +789,12 @@ export interface RunListResponse {
   /**
    * Usage statistics for each model during the evaluation run.
    */
-  per_model_usage: Array<RunListResponse.PerModelUsage>;
+  per_model_usage: Array<RunRetrieveResponse.PerModelUsage>;
 
   /**
    * Results per testing criteria applied during the evaluation run.
    */
-  per_testing_criteria_results: Array<RunListResponse.PerTestingCriteriaResult>;
+  per_testing_criteria_results: Array<RunRetrieveResponse.PerTestingCriteriaResult>;
 
   /**
    * The URL to the rendered evaluation run report on the UI dashboard.
@@ -745,7 +804,7 @@ export interface RunListResponse {
   /**
    * Counters summarizing the outcomes of the evaluation run.
    */
-  result_counts: RunListResponse.ResultCounts;
+  result_counts: RunRetrieveResponse.ResultCounts;
 
   /**
    * The status of the evaluation run.
@@ -753,7 +812,241 @@ export interface RunListResponse {
   status: string;
 }
 
-export namespace RunListResponse {
+export namespace RunRetrieveResponse {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
+
+    /**
+     * The type of run data source. Always `completions`.
+     */
+    type: 'completions';
+
+    input_messages?: Completions.Template | Completions.ItemReference;
+
+    /**
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+
+    sampling_params?: Completions.SamplingParams;
+  }
+
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+
+        sample?: Record<string, unknown>;
+      }
+    }
+
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
+
   export interface PerModelUsage {
     /**
      * The number of tokens retrieved from cache.
@@ -829,18 +1122,10 @@ export namespace RunListResponse {
   }
 }
 
-export interface RunDeleteResponse {
-  deleted?: boolean;
-
-  object?: string;
-
-  run_id?: string;
-}
-
 /**
  * A schema representing an evaluation run.
  */
-export interface RunCancelResponse {
+export interface RunListResponse {
   /**
    * Unique identifier for the evaluation run.
    */
@@ -854,7 +1139,10 @@ export interface RunCancelResponse {
   /**
    * Information about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunListResponse.Completions;
 
   /**
    * An object representing an error response from the Eval API.
@@ -894,12 +1182,12 @@ export interface RunCancelResponse {
   /**
    * Usage statistics for each model during the evaluation run.
    */
-  per_model_usage: Array<RunCancelResponse.PerModelUsage>;
+  per_model_usage: Array<RunListResponse.PerModelUsage>;
 
   /**
    * Results per testing criteria applied during the evaluation run.
    */
-  per_testing_criteria_results: Array<RunCancelResponse.PerTestingCriteriaResult>;
+  per_testing_criteria_results: Array<RunListResponse.PerTestingCriteriaResult>;
 
   /**
    * The URL to the rendered evaluation run report on the UI dashboard.
@@ -909,7 +1197,7 @@ export interface RunCancelResponse {
   /**
    * Counters summarizing the outcomes of the evaluation run.
    */
-  result_counts: RunCancelResponse.ResultCounts;
+  result_counts: RunListResponse.ResultCounts;
 
   /**
    * The status of the evaluation run.
@@ -917,25 +1205,660 @@ export interface RunCancelResponse {
   status: string;
 }
 
-export namespace RunCancelResponse {
-  export interface PerModelUsage {
+export namespace RunListResponse {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
     /**
-     * The number of tokens retrieved from cache.
+     * A EvalResponsesSource object describing a run data source configuration.
      */
-    cached_tokens: number;
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
 
     /**
-     * The number of completion tokens generated.
+     * The type of run data source. Always `completions`.
      */
-    completion_tokens: number;
+    type: 'completions';
 
-    /**
-     * The number of invocations.
-     */
-    invocation_count: number;
+    input_messages?: Completions.Template | Completions.ItemReference;
 
     /**
-     * The name of the model.
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+
+    sampling_params?: Completions.SamplingParams;
+  }
+
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+
+        sample?: Record<string, unknown>;
+      }
+    }
+
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
+
+  export interface PerModelUsage {
+    /**
+     * The number of tokens retrieved from cache.
+     */
+    cached_tokens: number;
+
+    /**
+     * The number of completion tokens generated.
+     */
+    completion_tokens: number;
+
+    /**
+     * The number of invocations.
+     */
+    invocation_count: number;
+
+    /**
+     * The name of the model.
+     */
+    model_name: string;
+
+    /**
+     * The number of prompt tokens used.
+     */
+    prompt_tokens: number;
+
+    /**
+     * The total number of tokens used.
+     */
+    total_tokens: number;
+  }
+
+  export interface PerTestingCriteriaResult {
+    /**
+     * Number of tests failed for this criteria.
+     */
+    failed: number;
+
+    /**
+     * Number of tests passed for this criteria.
+     */
+    passed: number;
+
+    /**
+     * A description of the testing criteria.
+     */
+    testing_criteria: string;
+  }
+
+  /**
+   * Counters summarizing the outcomes of the evaluation run.
+   */
+  export interface ResultCounts {
+    /**
+     * Number of output items that resulted in an error.
+     */
+    errored: number;
+
+    /**
+     * Number of output items that failed to pass the evaluation.
+     */
+    failed: number;
+
+    /**
+     * Number of output items that passed the evaluation.
+     */
+    passed: number;
+
+    /**
+     * Total number of executed output items.
+     */
+    total: number;
+  }
+}
+
+export interface RunDeleteResponse {
+  deleted?: boolean;
+
+  object?: string;
+
+  run_id?: string;
+}
+
+/**
+ * A schema representing an evaluation run.
+ */
+export interface RunCancelResponse {
+  /**
+   * Unique identifier for the evaluation run.
+   */
+  id: string;
+
+  /**
+   * Unix timestamp (in seconds) when the evaluation run was created.
+   */
+  created_at: number;
+
+  /**
+   * Information about the run's data source.
+   */
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunCancelResponse.Completions;
+
+  /**
+   * An object representing an error response from the Eval API.
+   */
+  error: EvalAPIError;
+
+  /**
+   * The identifier of the associated evaluation.
+   */
+  eval_id: string;
+
+  /**
+   * Set of 16 key-value pairs that can be attached to an object. This can be useful
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
+   */
+  metadata: Shared.Metadata | null;
+
+  /**
+   * The model that is evaluated, if applicable.
+   */
+  model: string;
+
+  /**
+   * The name of the evaluation run.
+   */
+  name: string;
+
+  /**
+   * The type of the object. Always "eval.run".
+   */
+  object: 'eval.run';
+
+  /**
+   * Usage statistics for each model during the evaluation run.
+   */
+  per_model_usage: Array<RunCancelResponse.PerModelUsage>;
+
+  /**
+   * Results per testing criteria applied during the evaluation run.
+   */
+  per_testing_criteria_results: Array<RunCancelResponse.PerTestingCriteriaResult>;
+
+  /**
+   * The URL to the rendered evaluation run report on the UI dashboard.
+   */
+  report_url: string;
+
+  /**
+   * Counters summarizing the outcomes of the evaluation run.
+   */
+  result_counts: RunCancelResponse.ResultCounts;
+
+  /**
+   * The status of the evaluation run.
+   */
+  status: string;
+}
+
+export namespace RunCancelResponse {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
+
+    /**
+     * The type of run data source. Always `completions`.
+     */
+    type: 'completions';
+
+    input_messages?: Completions.Template | Completions.ItemReference;
+
+    /**
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+
+    sampling_params?: Completions.SamplingParams;
+  }
+
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+
+        sample?: Record<string, unknown>;
+      }
+    }
+
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
+
+  export interface PerModelUsage {
+    /**
+     * The number of tokens retrieved from cache.
+     */
+    cached_tokens: number;
+
+    /**
+     * The number of completion tokens generated.
+     */
+    completion_tokens: number;
+
+    /**
+     * The number of invocations.
+     */
+    invocation_count: number;
+
+    /**
+     * The name of the model.
      */
     model_name: string;
 
@@ -997,7 +1920,10 @@ export interface RunCreateParams {
   /**
    * Details about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunCreateParams.CreateEvalResponsesRunDataSource;
 
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1015,6 +1941,247 @@ export interface RunCreateParams {
   name?: string;
 }
 
+export namespace RunCreateParams {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface CreateEvalResponsesRunDataSource {
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    source:
+      | CreateEvalResponsesRunDataSource.FileContent
+      | CreateEvalResponsesRunDataSource.FileID
+      | CreateEvalResponsesRunDataSource.Responses;
+
+    /**
+     * The type of run data source. Always `completions`.
+     */
+    type: 'completions';
+
+    input_messages?:
+      | CreateEvalResponsesRunDataSource.Template
+      | CreateEvalResponsesRunDataSource.ItemReference;
+
+    /**
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+
+    sampling_params?: CreateEvalResponsesRunDataSource.SamplingParams;
+  }
+
+  export namespace CreateEvalResponsesRunDataSource {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+
+        sample?: Record<string, unknown>;
+      }
+    }
+
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
+}
+
 export interface RunListParams extends CursorPageParams {
   /**
    * Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
@@ -1023,8 +2190,8 @@ export interface RunListParams extends CursorPageParams {
   order?: 'asc' | 'desc';
 
   /**
-   * Filter runs by status. Use "queued" | "in_progress" | "failed" | "completed" |
-   * "canceled".
+   * Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+   * | `canceled`.
    */
   status?: 'queued' | 'in_progress' | 'completed' | 'canceled' | 'failed';
 }
diff --git a/src/resources/fine-tuning/checkpoints/permissions.ts b/src/resources/fine-tuning/checkpoints/permissions.ts
index 500c3de81..e808b2001 100644
--- a/src/resources/fine-tuning/checkpoints/permissions.ts
+++ b/src/resources/fine-tuning/checkpoints/permissions.ts
@@ -61,9 +61,13 @@ export class Permissions extends APIResource {
    */
   del(
     fineTunedModelCheckpoint: string,
+    permissionId: string,
     options?: Core.RequestOptions,
   ): Core.APIPromise<PermissionDeleteResponse> {
-    return this._client.delete(`/fine_tuning/checkpoints/${fineTunedModelCheckpoint}/permissions`, options);
+    return this._client.delete(
+      `/fine_tuning/checkpoints/${fineTunedModelCheckpoint}/permissions/${permissionId}`,
+      options,
+    );
   }
 }
 
diff --git a/src/resources/images.ts b/src/resources/images.ts
index 8e1c6d92e..de1882d30 100644
--- a/src/resources/images.ts
+++ b/src/resources/images.ts
@@ -5,7 +5,7 @@ import * as Core from '../core';
 
 export class Images extends APIResource {
   /**
-   * Creates a variation of a given image.
+   * Creates a variation of a given image. This endpoint only supports `dall-e-2`.
    */
   createVariation(
     body: ImageCreateVariationParams,
@@ -15,7 +15,8 @@ export class Images extends APIResource {
   }
 
   /**
-   * Creates an edited or extended image given an original image and a prompt.
+   * Creates an edited or extended image given one or more source images and a
+   * prompt. This endpoint only supports `gpt-image-1` and `dall-e-2`.
    */
   edit(body: ImageEditParams, options?: Core.RequestOptions): Core.APIPromise<ImagesResponse> {
     return this._client.post('/images/edits', Core.multipartFormRequestOptions({ body, ...options }));
@@ -23,6 +24,7 @@ export class Images extends APIResource {
 
   /**
    * Creates an image given a prompt.
+   * [Learn more](https://platform.openai.com/docs/guides/images).
    */
   generate(body: ImageGenerateParams, options?: Core.RequestOptions): Core.APIPromise<ImagesResponse> {
     return this._client.post('/images/generations', { body, ...options });
@@ -30,33 +32,93 @@ export class Images extends APIResource {
 }
 
 /**
- * Represents the url or the content of an image generated by the OpenAI API.
+ * Represents the content or the URL of an image generated by the OpenAI API.
  */
 export interface Image {
   /**
-   * The base64-encoded JSON of the generated image, if `response_format` is
-   * `b64_json`.
+   * The base64-encoded JSON of the generated image. Default value for `gpt-image-1`,
+   * and only present if `response_format` is set to `b64_json` for `dall-e-2` and
+   * `dall-e-3`.
    */
   b64_json?: string;
 
   /**
-   * The prompt that was used to generate the image, if there was any revision to the
-   * prompt.
+   * For `dall-e-3` only, the revised prompt that was used to generate the image.
    */
   revised_prompt?: string;
 
   /**
-   * The URL of the generated image, if `response_format` is `url` (default).
+   * When using `dall-e-2` or `dall-e-3`, the URL of the generated image if
+   * `response_format` is set to `url` (default value). Unsupported for
+   * `gpt-image-1`.
    */
   url?: string;
 }
 
-export type ImageModel = 'dall-e-2' | 'dall-e-3';
+export type ImageModel = 'dall-e-2' | 'dall-e-3' | 'gpt-image-1';
 
+/**
+ * The response from the image generation endpoint.
+ */
 export interface ImagesResponse {
+  /**
+   * The Unix timestamp (in seconds) of when the image was created.
+   */
   created: number;
 
-  data: Array<Image>;
+  /**
+   * The list of generated images.
+   */
+  data?: Array<Image>;
+
+  /**
+   * For `gpt-image-1` only, the token usage information for the image generation.
+   */
+  usage?: ImagesResponse.Usage;
+}
+
+export namespace ImagesResponse {
+  /**
+   * For `gpt-image-1` only, the token usage information for the image generation.
+   */
+  export interface Usage {
+    /**
+     * The number of tokens (images and text) in the input prompt.
+     */
+    input_tokens: number;
+
+    /**
+     * The input tokens detailed information for the image generation.
+     */
+    input_tokens_details: Usage.InputTokensDetails;
+
+    /**
+     * The number of image tokens in the output image.
+     */
+    output_tokens: number;
+
+    /**
+     * The total number of tokens (images and text) used for the image generation.
+     */
+    total_tokens: number;
+  }
+
+  export namespace Usage {
+    /**
+     * The input tokens detailed information for the image generation.
+     */
+    export interface InputTokensDetails {
+      /**
+       * The number of image tokens in the input prompt.
+       */
+      image_tokens: number;
+
+      /**
+       * The number of text tokens in the input prompt.
+       */
+      text_tokens: number;
+    }
+  }
 }
 
 export interface ImageCreateVariationParams {
@@ -73,8 +135,7 @@ export interface ImageCreateVariationParams {
   model?: (string & {}) | ImageModel | null;
 
   /**
-   * The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-   * `n=1` is supported.
+   * The number of images to generate. Must be between 1 and 10.
    */
   n?: number | null;
 
@@ -101,27 +162,31 @@ export interface ImageCreateVariationParams {
 
 export interface ImageEditParams {
   /**
-   * The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-   * is not provided, image must have transparency, which will be used as the mask.
+   * The image(s) to edit. Must be a supported image file or an array of images. For
+   * `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+   * 25MB. For `dall-e-2`, you can only provide one image, and it should be a square
+   * `png` file less than 4MB.
    */
-  image: Core.Uploadable;
+  image: Core.Uploadable | Array<Core.Uploadable>;
 
   /**
    * A text description of the desired image(s). The maximum length is 1000
-   * characters.
+   * characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
    */
   prompt: string;
 
   /**
    * An additional image whose fully transparent areas (e.g. where alpha is zero)
-   * indicate where `image` should be edited. Must be a valid PNG file, less than
+   * indicate where `image` should be edited. If there are multiple images provided,
+   * the mask will be applied on the first image. Must be a valid PNG file, less than
    * 4MB, and have the same dimensions as `image`.
    */
   mask?: Core.Uploadable;
 
   /**
-   * The model to use for image generation. Only `dall-e-2` is supported at this
-   * time.
+   * The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+   * supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+   * is used.
    */
   model?: (string & {}) | ImageModel | null;
 
@@ -130,16 +195,25 @@ export interface ImageEditParams {
    */
   n?: number | null;
 
+  /**
+   * The quality of the image that will be generated. `high`, `medium` and `low` are
+   * only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+   * Defaults to `auto`.
+   */
+  quality?: 'standard' | 'low' | 'medium' | 'high' | 'auto' | null;
+
   /**
    * The format in which the generated images are returned. Must be one of `url` or
    * `b64_json`. URLs are only valid for 60 minutes after the image has been
-   * generated.
+   * generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+   * will always return base64-encoded images.
    */
   response_format?: 'url' | 'b64_json' | null;
 
   /**
-   * The size of the generated images. Must be one of `256x256`, `512x512`, or
-   * `1024x1024`.
+   * The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+   * (landscape), `1024x1536` (portrait), or `auto` (default value) for
+   * `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
    */
   size?: '256x256' | '512x512' | '1024x1024' | null;
 
@@ -153,16 +227,36 @@ export interface ImageEditParams {
 
 export interface ImageGenerateParams {
   /**
-   * A text description of the desired image(s). The maximum length is 1000
-   * characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+   * A text description of the desired image(s). The maximum length is 32000
+   * characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+   * for `dall-e-3`.
    */
   prompt: string;
 
   /**
-   * The model to use for image generation.
+   * Allows to set transparency for the background of the generated image(s). This
+   * parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+   * `opaque` or `auto` (default value). When `auto` is used, the model will
+   * automatically determine the best background for the image.
+   *
+   * If `transparent`, the output format needs to support transparency, so it should
+   * be set to either `png` (default value) or `webp`.
+   */
+  background?: 'transparent' | 'opaque' | 'auto' | null;
+
+  /**
+   * The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+   * `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+   * `gpt-image-1` is used.
    */
   model?: (string & {}) | ImageModel | null;
 
+  /**
+   * Control the content-moderation level for images generated by `gpt-image-1`. Must
+   * be either `low` for less restrictive filtering or `auto` (default value).
+   */
+  moderation?: 'low' | 'auto' | null;
+
   /**
    * The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
    * `n=1` is supported.
@@ -170,31 +264,59 @@ export interface ImageGenerateParams {
   n?: number | null;
 
   /**
-   * The quality of the image that will be generated. `hd` creates images with finer
-   * details and greater consistency across the image. This param is only supported
-   * for `dall-e-3`.
+   * The compression level (0-100%) for the generated images. This parameter is only
+   * supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+   * defaults to 100.
    */
-  quality?: 'standard' | 'hd';
+  output_compression?: number | null;
 
   /**
-   * The format in which the generated images are returned. Must be one of `url` or
-   * `b64_json`. URLs are only valid for 60 minutes after the image has been
-   * generated.
+   * The format in which the generated images are returned. This parameter is only
+   * supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
+   */
+  output_format?: 'png' | 'jpeg' | 'webp' | null;
+
+  /**
+   * The quality of the image that will be generated.
+   *
+   * - `auto` (default value) will automatically select the best quality for the
+   *   given model.
+   * - `high`, `medium` and `low` are supported for `gpt-image-1`.
+   * - `hd` and `standard` are supported for `dall-e-3`.
+   * - `standard` is the only option for `dall-e-2`.
+   */
+  quality?: 'standard' | 'hd' | 'low' | 'medium' | 'high' | 'auto' | null;
+
+  /**
+   * The format in which generated images with `dall-e-2` and `dall-e-3` are
+   * returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+   * after the image has been generated. This parameter isn't supported for
+   * `gpt-image-1` which will always return base64-encoded images.
    */
   response_format?: 'url' | 'b64_json' | null;
 
   /**
-   * The size of the generated images. Must be one of `256x256`, `512x512`, or
-   * `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-   * `1024x1792` for `dall-e-3` models.
+   * The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+   * (landscape), `1024x1536` (portrait), or `auto` (default value) for
+   * `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+   * one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
    */
-  size?: '256x256' | '512x512' | '1024x1024' | '1792x1024' | '1024x1792' | null;
+  size?:
+    | 'auto'
+    | '1024x1024'
+    | '1536x1024'
+    | '1024x1536'
+    | '256x256'
+    | '512x512'
+    | '1792x1024'
+    | '1024x1792'
+    | null;
 
   /**
-   * The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-   * causes the model to lean towards generating hyper-real and dramatic images.
-   * Natural causes the model to produce more natural, less hyper-real looking
-   * images. This param is only supported for `dall-e-3`.
+   * The style of the generated images. This parameter is only supported for
+   * `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+   * towards generating hyper-real and dramatic images. Natural causes the model to
+   * produce more natural, less hyper-real looking images.
    */
   style?: 'vivid' | 'natural' | null;
 
diff --git a/src/resources/responses/responses.ts b/src/resources/responses/responses.ts
index 52dd079fc..771b8daf2 100644
--- a/src/resources/responses/responses.ts
+++ b/src/resources/responses/responses.ts
@@ -2158,6 +2158,160 @@ export namespace ResponseReasoningItem {
   }
 }
 
+/**
+ * Emitted when a new reasoning summary part is added.
+ */
+export interface ResponseReasoningSummaryPartAddedEvent {
+  /**
+   * The ID of the item this summary part is associated with.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item this summary part is associated with.
+   */
+  output_index: number;
+
+  /**
+   * The summary part that was added.
+   */
+  part: ResponseReasoningSummaryPartAddedEvent.Part;
+
+  /**
+   * The index of the summary part within the reasoning summary.
+   */
+  summary_index: number;
+
+  /**
+   * The type of the event. Always `response.reasoning_summary_part.added`.
+   */
+  type: 'response.reasoning_summary_part.added';
+}
+
+export namespace ResponseReasoningSummaryPartAddedEvent {
+  /**
+   * The summary part that was added.
+   */
+  export interface Part {
+    /**
+     * The text of the summary part.
+     */
+    text: string;
+
+    /**
+     * The type of the summary part. Always `summary_text`.
+     */
+    type: 'summary_text';
+  }
+}
+
+/**
+ * Emitted when a reasoning summary part is completed.
+ */
+export interface ResponseReasoningSummaryPartDoneEvent {
+  /**
+   * The ID of the item this summary part is associated with.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item this summary part is associated with.
+   */
+  output_index: number;
+
+  /**
+   * The completed summary part.
+   */
+  part: ResponseReasoningSummaryPartDoneEvent.Part;
+
+  /**
+   * The index of the summary part within the reasoning summary.
+   */
+  summary_index: number;
+
+  /**
+   * The type of the event. Always `response.reasoning_summary_part.done`.
+   */
+  type: 'response.reasoning_summary_part.done';
+}
+
+export namespace ResponseReasoningSummaryPartDoneEvent {
+  /**
+   * The completed summary part.
+   */
+  export interface Part {
+    /**
+     * The text of the summary part.
+     */
+    text: string;
+
+    /**
+     * The type of the summary part. Always `summary_text`.
+     */
+    type: 'summary_text';
+  }
+}
+
+/**
+ * Emitted when a delta is added to a reasoning summary text.
+ */
+export interface ResponseReasoningSummaryTextDeltaEvent {
+  /**
+   * The text delta that was added to the summary.
+   */
+  delta: string;
+
+  /**
+   * The ID of the item this summary text delta is associated with.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item this summary text delta is associated with.
+   */
+  output_index: number;
+
+  /**
+   * The index of the summary part within the reasoning summary.
+   */
+  summary_index: number;
+
+  /**
+   * The type of the event. Always `response.reasoning_summary_text.delta`.
+   */
+  type: 'response.reasoning_summary_text.delta';
+}
+
+/**
+ * Emitted when a reasoning summary text is completed.
+ */
+export interface ResponseReasoningSummaryTextDoneEvent {
+  /**
+   * The ID of the item this summary text is associated with.
+   */
+  item_id: string;
+
+  /**
+   * The index of the output item this summary text is associated with.
+   */
+  output_index: number;
+
+  /**
+   * The index of the summary part within the reasoning summary.
+   */
+  summary_index: number;
+
+  /**
+   * The full text of the completed reasoning summary.
+   */
+  text: string;
+
+  /**
+   * The type of the event. Always `response.reasoning_summary_text.done`.
+   */
+  type: 'response.reasoning_summary_text.done';
+}
+
 /**
  * Emitted when there is a partial refusal text.
  */
@@ -2252,6 +2406,10 @@ export type ResponseStreamEvent =
   | ResponseIncompleteEvent
   | ResponseOutputItemAddedEvent
   | ResponseOutputItemDoneEvent
+  | ResponseReasoningSummaryPartAddedEvent
+  | ResponseReasoningSummaryPartDoneEvent
+  | ResponseReasoningSummaryTextDeltaEvent
+  | ResponseReasoningSummaryTextDoneEvent
   | ResponseRefusalDeltaEvent
   | ResponseRefusalDoneEvent
   | ResponseTextAnnotationDeltaEvent
@@ -2967,6 +3125,10 @@ export declare namespace Responses {
     type ResponseOutputRefusal as ResponseOutputRefusal,
     type ResponseOutputText as ResponseOutputText,
     type ResponseReasoningItem as ResponseReasoningItem,
+    type ResponseReasoningSummaryPartAddedEvent as ResponseReasoningSummaryPartAddedEvent,
+    type ResponseReasoningSummaryPartDoneEvent as ResponseReasoningSummaryPartDoneEvent,
+    type ResponseReasoningSummaryTextDeltaEvent as ResponseReasoningSummaryTextDeltaEvent,
+    type ResponseReasoningSummaryTextDoneEvent as ResponseReasoningSummaryTextDoneEvent,
     type ResponseRefusalDeltaEvent as ResponseRefusalDeltaEvent,
     type ResponseRefusalDoneEvent as ResponseRefusalDoneEvent,
     type ResponseStatus as ResponseStatus,
diff --git a/src/version.ts b/src/version.ts
index cd1995322..1215a5e79 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '4.95.1'; // x-release-please-version
+export const VERSION = '4.96.0'; // x-release-please-version
diff --git a/tests/api-resources/evals/evals.test.ts b/tests/api-resources/evals/evals.test.ts
index fabc2602a..45d1c4f9b 100644
--- a/tests/api-resources/evals/evals.test.ts
+++ b/tests/api-resources/evals/evals.test.ts
@@ -47,7 +47,6 @@ describe('resource evals', () => {
       ],
       metadata: { foo: 'string' },
       name: 'name',
-      share_with_openai: true,
     });
   });
 
diff --git a/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts b/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts
index e7aceae3e..1e4b40a94 100644
--- a/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts
+++ b/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts
@@ -61,10 +61,10 @@ describe('resource permissions', () => {
     ).rejects.toThrow(OpenAI.NotFoundError);
   });
 
-  // OpenAPI spec is slightly incorrect
-  test.skip('del', async () => {
+  test('del', async () => {
     const responsePromise = client.fineTuning.checkpoints.permissions.del(
       'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd',
+      'cp_zc4Q7MP6XxulcVzj4MZdwsAB',
     );
     const rawResponse = await responsePromise.asResponse();
     expect(rawResponse).toBeInstanceOf(Response);
@@ -75,13 +75,14 @@ describe('resource permissions', () => {
     expect(dataAndResponse.response).toBe(rawResponse);
   });
 
-  // OpenAPI spec is slightly incorrect
-  test.skip('del: request options instead of params are passed correctly', async () => {
+  test('del: request options instead of params are passed correctly', async () => {
     // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error
     await expect(
-      client.fineTuning.checkpoints.permissions.del('ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', {
-        path: '/_stainless_unknown_path',
-      }),
+      client.fineTuning.checkpoints.permissions.del(
+        'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd',
+        'cp_zc4Q7MP6XxulcVzj4MZdwsAB',
+        { path: '/_stainless_unknown_path' },
+      ),
     ).rejects.toThrow(OpenAI.NotFoundError);
   });
 });
diff --git a/tests/api-resources/images.test.ts b/tests/api-resources/images.test.ts
index 4f15e20ac..e9b460254 100644
--- a/tests/api-resources/images.test.ts
+++ b/tests/api-resources/images.test.ts
@@ -54,6 +54,7 @@ describe('resource images', () => {
       mask: await toFile(Buffer.from('# my file contents'), 'README.md'),
       model: 'string',
       n: 1,
+      quality: 'high',
       response_format: 'url',
       size: '1024x1024',
       user: 'user-1234',
@@ -74,9 +75,13 @@ describe('resource images', () => {
   test('generate: required and optional params', async () => {
     const response = await client.images.generate({
       prompt: 'A cute baby sea otter',
+      background: 'transparent',
       model: 'string',
+      moderation: 'low',
       n: 1,
-      quality: 'standard',
+      output_compression: 100,
+      output_format: 'png',
+      quality: 'medium',
       response_format: 'url',
       size: '1024x1024',
       style: 'vivid',