fix(api): Fix evals and code interpreter interfaces

stainless-app[bot] · stainless-app[bot] · commit 992a9d84fe41 · 2025-06-02T19:35:58.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-d4bcffecf0cdadf746faa6708ed1ec81fac451f9b857deabbab26f0a343b9314.yml
-openapi_spec_hash: 7c54a18b4381248bda7cc34c52142615
-config_hash: e618aa8ff61aea826540916336de65a6
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-2bcc845d8635bf93ddcf9ee723af4d7928248412a417bee5fc10d863a1e13867.yml
+openapi_spec_hash: 865230cb3abeb01bd85de05891af23c4
+config_hash: ed1e6b3c5f93d12b80d31167f55c557c
diff --git a/api.md b/api.md
@@ -709,7 +709,7 @@ Methods:
 - <code title="post /responses">client.responses.<a href="./src/resources/responses/responses.ts">create</a>({ ...params }) -> Response</code>
 - <code title="get /responses/{response_id}">client.responses.<a href="./src/resources/responses/responses.ts">retrieve</a>(responseID, { ...params }) -> Response</code>
 - <code title="delete /responses/{response_id}">client.responses.<a href="./src/resources/responses/responses.ts">delete</a>(responseID) -> void</code>
-- <code title="post /responses/{response_id}/cancel">client.responses.<a href="./src/resources/responses/responses.ts">cancel</a>(responseID) -> void</code>
+- <code title="post /responses/{response_id}/cancel">client.responses.<a href="./src/resources/responses/responses.ts">cancel</a>(responseID) -> Response</code>
 
 ## InputItems
 
diff --git a/src/resources/audio/transcriptions.ts b/src/resources/audio/transcriptions.ts
@@ -199,7 +199,7 @@ export namespace TranscriptionTextDeltaEvent {
     /**
      * The bytes that were used to generate the log probability.
      */
-    bytes?: Array<unknown>;
+    bytes?: Array<number>;
 
     /**
      * The log probability of the token.
@@ -244,7 +244,7 @@ export namespace TranscriptionTextDoneEvent {
     /**
      * The bytes that were used to generate the log probability.
      */
-    bytes?: Array<unknown>;
+    bytes?: Array<number>;
 
     /**
      * The log probability of the token.
diff --git a/src/resources/chat/completions/completions.ts b/src/resources/chat/completions/completions.ts
@@ -280,9 +280,9 @@ export interface ChatCompletion {
    *   utilize scale tier credits until they are exhausted.
    * - If set to 'auto', and the Project is not Scale tier enabled, the request will
    *   be processed using the default service tier with a lower uptime SLA and no
-   *   latency guarentee.
+   *   latency guarantee.
    * - If set to 'default', the request will be processed using the default service
-   *   tier with a lower uptime SLA and no latency guarentee.
+   *   tier with a lower uptime SLA and no latency guarantee.
    * - If set to 'flex', the request will be processed with the Flex Processing
    *   service tier.
    *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
@@ -531,9 +531,9 @@ export interface ChatCompletionChunk {
    *   utilize scale tier credits until they are exhausted.
    * - If set to 'auto', and the Project is not Scale tier enabled, the request will
    *   be processed using the default service tier with a lower uptime SLA and no
-   *   latency guarentee.
+   *   latency guarantee.
    * - If set to 'default', the request will be processed using the default service
-   *   tier with a lower uptime SLA and no latency guarentee.
+   *   tier with a lower uptime SLA and no latency guarantee.
    * - If set to 'flex', the request will be processed with the Flex Processing
    *   service tier.
    *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
@@ -1453,9 +1453,9 @@ export interface ChatCompletionCreateParamsBase {
    *   utilize scale tier credits until they are exhausted.
    * - If set to 'auto', and the Project is not Scale tier enabled, the request will
    *   be processed using the default service tier with a lower uptime SLA and no
-   *   latency guarentee.
+   *   latency guarantee.
    * - If set to 'default', the request will be processed using the default service
-   *   tier with a lower uptime SLA and no latency guarentee.
+   *   tier with a lower uptime SLA and no latency guarantee.
    * - If set to 'flex', the request will be processed with the Flex Processing
    *   service tier.
    *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
diff --git a/src/resources/fine-tuning/alpha/graders.ts b/src/resources/fine-tuning/alpha/graders.ts
@@ -20,7 +20,6 @@ export class Graders extends APIResource {
    *     type: 'string_check',
    *   },
    *   model_sample: 'model_sample',
-   *   reference_answer: 'string',
    * });
    * ```
    */
@@ -134,14 +133,20 @@ export interface GraderRunParams {
     | GraderModelsAPI.MultiGrader;
 
   /**
-   * The model sample to be evaluated.
+   * The model sample to be evaluated. This value will be used to populate the
+   * `sample` namespace. See
+   * [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+   * The `output_json` variable will be populated if the model sample is a valid JSON
+   * string.
    */
   model_sample: string;
 
   /**
-   * The reference answer for the evaluation.
+   * The dataset item provided to the grader. This will be used to populate the
+   * `item` namespace. See
+   * [the guide](https://platform.openai.com/docs/guides/graders) for more details.
    */
-  reference_answer: string | unknown | Array<unknown> | number;
+  item?: unknown;
 }
 
 export interface GraderValidateParams {
diff --git a/src/resources/fine-tuning/jobs/jobs.ts b/src/resources/fine-tuning/jobs/jobs.ts
@@ -295,7 +295,7 @@ export namespace FineTuningJob {
      * Number of examples in each batch. A larger batch size means that model
      * parameters are updated less frequently, but with lower variance.
      */
-    batch_size?: unknown | 'auto' | number | null;
+    batch_size?: 'auto' | number | null;
 
     /**
      * Scaling factor for the learning rate. A smaller learning rate may be useful to
diff --git a/src/resources/graders/grader-models.ts b/src/resources/graders/grader-models.ts
@@ -92,10 +92,11 @@ export interface MultiGrader {
    */
   calculate_output: string;
 
-  graders: Record<
-    string,
-    StringCheckGrader | TextSimilarityGrader | PythonGrader | ScoreModelGrader | LabelModelGrader
-  >;
+  /**
+   * A StringCheckGrader object that performs a string comparison between input and
+   * reference using a specified operation.
+   */
+  graders: StringCheckGrader | TextSimilarityGrader | PythonGrader | ScoreModelGrader | LabelModelGrader;
 
   /**
    * The name of the grader.
diff --git a/src/resources/images.ts b/src/resources/images.ts
@@ -193,7 +193,7 @@ export interface ImageEditParams {
    * The image(s) to edit. Must be a supported image file or an array of images.
    *
    * For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
-   * 25MB. You can provide up to 16 images.
+   * 50MB. You can provide up to 16 images.
    *
    * For `dall-e-2`, you can only provide one image, and it should be a square `png`
    * file less than 4MB.
diff --git a/src/resources/responses/responses.ts b/src/resources/responses/responses.ts
@@ -185,16 +185,13 @@ export class Responses extends APIResource {
    *
    * @example
    * ```ts
-   * await client.responses.cancel(
+   * const response = await client.responses.cancel(
    *   'resp_677efb5139a88190b512bc3fef8e535d',
    * );
    * ```
    */
-  cancel(responseID: string, options?: RequestOptions): APIPromise<void> {
-    return this._client.post(path`/responses/${responseID}/cancel`, {
-      ...options,
-      headers: buildHeaders([{ Accept: '*/*' }, options?.headers]),
-    });
+  cancel(responseID: string, options?: RequestOptions): APIPromise<Response> {
+    return this._client.post(path`/responses/${responseID}/cancel`, options);
   }
 }
 
@@ -488,9 +485,9 @@ export interface Response {
    *   utilize scale tier credits until they are exhausted.
    * - If set to 'auto', and the Project is not Scale tier enabled, the request will
    *   be processed using the default service tier with a lower uptime SLA and no
-   *   latency guarentee.
+   *   latency guarantee.
    * - If set to 'default', the request will be processed using the default service
-   *   tier with a lower uptime SLA and no latency guarentee.
+   *   tier with a lower uptime SLA and no latency guarantee.
    * - If set to 'flex', the request will be processed with the Flex Processing
    *   service tier.
    *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
@@ -643,9 +640,9 @@ export interface ResponseCodeInterpreterCallCodeDeltaEvent {
   sequence_number: number;
 
   /**
-   * The type of the event. Always `response.code_interpreter_call.code.delta`.
+   * The type of the event. Always `response.code_interpreter_call_code.delta`.
    */
-  type: 'response.code_interpreter_call.code.delta';
+  type: 'response.code_interpreter_call_code.delta';
 }
 
 /**
@@ -668,9 +665,9 @@ export interface ResponseCodeInterpreterCallCodeDoneEvent {
   sequence_number: number;
 
   /**
-   * The type of the event. Always `response.code_interpreter_call.code.done`.
+   * The type of the event. Always `response.code_interpreter_call_code.done`.
    */
-  type: 'response.code_interpreter_call.code.done';
+  type: 'response.code_interpreter_call_code.done';
 }
 
 /**
@@ -1875,12 +1872,15 @@ export interface ResponseInProgressEvent {
  *   multi-turn conversations when using the Responses API statelessly (like when
  *   the `store` parameter is set to `false`, or when an organization is enrolled
  *   in the zero data retention program).
+ * - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ *   in code interpreter tool call items.
  */
 export type ResponseIncludable =
   | 'file_search_call.results'
   | 'message.input_image.image_url'
   | 'computer_call_output.output.image_url'
-  | 'reasoning.encrypted_content';
+  | 'reasoning.encrypted_content'
+  | 'code_interpreter_call.outputs';
 
 /**
  * An event that is emitted when a response finishes as incomplete.
@@ -3296,7 +3296,10 @@ export interface ResponseOutputText {
    * The annotations of the text output.
    */
   annotations: Array<
-    ResponseOutputText.FileCitation | ResponseOutputText.URLCitation | ResponseOutputText.FilePath
+    | ResponseOutputText.FileCitation
+    | ResponseOutputText.URLCitation
+    | ResponseOutputText.ContainerFileCitation
+    | ResponseOutputText.FilePath
   >;
 
   /**
@@ -3363,6 +3366,36 @@ export namespace ResponseOutputText {
     url: string;
   }
 
+  /**
+   * A citation for a container file used to generate a model response.
+   */
+  export interface ContainerFileCitation {
+    /**
+     * The ID of the container file.
+     */
+    container_id: string;
+
+    /**
+     * The index of the last character of the container file citation in the message.
+     */
+    end_index: number;
+
+    /**
+     * The ID of the file.
+     */
+    file_id: string;
+
+    /**
+     * The index of the first character of the container file citation in the message.
+     */
+    start_index: number;
+
+    /**
+     * The type of the container file citation. Always `container_file_citation`.
+     */
+    type: 'container_file_citation';
+  }
+
   /**
    * A path to a file.
    */
@@ -4573,6 +4606,8 @@ export interface ResponseCreateParamsBase {
    *   multi-turn conversations when using the Responses API statelessly (like when
    *   the `store` parameter is set to `false`, or when an organization is enrolled
    *   in the zero data retention program).
+   * - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+   *   in code interpreter tool call items.
    */
   include?: Array<ResponseIncludable> | null;
 
@@ -4631,9 +4666,9 @@ export interface ResponseCreateParamsBase {
    *   utilize scale tier credits until they are exhausted.
    * - If set to 'auto', and the Project is not Scale tier enabled, the request will
    *   be processed using the default service tier with a lower uptime SLA and no
-   *   latency guarentee.
+   *   latency guarantee.
    * - If set to 'default', the request will be processed using the default service
-   *   tier with a lower uptime SLA and no latency guarentee.
+   *   tier with a lower uptime SLA and no latency guarantee.
    * - If set to 'flex', the request will be processed with the Flex Processing
    *   service tier.
    *   [Learn more](https://platform.openai.com/docs/guides/flex-processing).
diff --git a/tests/api-resources/fine-tuning/alpha/graders.test.ts b/tests/api-resources/fine-tuning/alpha/graders.test.ts
@@ -12,7 +12,6 @@ describe('resource graders', () => {
     const responsePromise = client.fineTuning.alpha.graders.run({
       grader: { input: 'input', name: 'name', operation: 'eq', reference: 'reference', type: 'string_check' },
       model_sample: 'model_sample',
-      reference_answer: 'string',
     });
     const rawResponse = await responsePromise.asResponse();
     expect(rawResponse).toBeInstanceOf(Response);
@@ -27,7 +26,7 @@ describe('resource graders', () => {
     const response = await client.fineTuning.alpha.graders.run({
       grader: { input: 'input', name: 'name', operation: 'eq', reference: 'reference', type: 'string_check' },
       model_sample: 'model_sample',
-      reference_answer: 'string',
+      item: {},
     });
   });
 

Original file line number	Diff line number	Diff line change
`@@ -193,7 +193,7 @@ export interface ImageEditParams {`
`193`	`193`	`* The image(s) to edit. Must be a supported image file or an array of images.`
`194`	`194`	`*`
`195`	`195`	* For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
`196`		`- * 25MB. You can provide up to 16 images.`
	`196`	`+ * 50MB. You can provide up to 16 images.`
`197`	`197`	`*`
`198`	`198`	* For `dall-e-2`, you can only provide one image, and it should be a square `png`
`199`	`199`	`* file less than 4MB.`