diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ed1eead8..b0aac41b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ on: jobs: lint: + timeout-minutes: 10 name: lint runs-on: ubuntu-latest steps: @@ -27,6 +28,7 @@ jobs: run: ./scripts/lint build: + timeout-minutes: 5 name: build runs-on: ubuntu-latest permissions: @@ -61,6 +63,7 @@ jobs: SHA: ${{ github.sha }} run: ./scripts/utils/upload-artifact.sh test: + timeout-minutes: 10 name: test runs-on: ubuntu-latest steps: @@ -78,6 +81,7 @@ jobs: run: ./scripts/test examples: + timeout-minutes: 10 name: examples runs-on: ubuntu-latest if: github.repository == 'openai/openai-node' diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 2f61d58b0..5b0015f5b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "4.95.1" + ".": "4.96.0" } diff --git a/.stats.yml b/.stats.yml index 848c5b5ad..d92408173 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 97 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml -openapi_spec_hash: c855121b2b2324b99499c9244c21d24d -config_hash: d20837393b73efdb19cd08e04c1cc9a1 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-8b68ae6b807dca92e914da1dd9e835a20f69b075e79102a264367fd7fddddb33.yml +openapi_spec_hash: b6ade5b1a6327339e6669e1134de2d03 +config_hash: b597cd9a31e9e5ec709e2eefb4c54122 diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f864e203..47717a4eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## 4.96.0 (2025-04-23) + +Full Changelog: [v4.95.1...v4.96.0](https://github.com/openai/openai-node/compare/v4.95.1...v4.96.0) + +### Features + +* **api:** adding new image model support ([a00d331](https://github.com/openai/openai-node/commit/a00d33190edd08df7d9c088c00ab7b77673f88ba)) + + +### Bug Fixes + +* **types:** export AssistantStream ([#1472](https://github.com/openai/openai-node/issues/1472)) ([626c844](https://github.com/openai/openai-node/commit/626c844a758a68ffbff48873d4773be2e3868952)) + + +### Chores + +* **ci:** add timeout thresholds for CI jobs ([e465063](https://github.com/openai/openai-node/commit/e46506351097f1de39c866c28b6ec20fa724fc36)) + ## 4.95.1 (2025-04-18) Full Changelog: [v4.95.0...v4.95.1](https://github.com/openai/openai-node/compare/v4.95.0...v4.95.1) diff --git a/api.md b/api.md index 2eb54b34a..49e6548a8 100644 --- a/api.md +++ b/api.md @@ -249,7 +249,7 @@ Methods: - client.fineTuning.checkpoints.permissions.create(fineTunedModelCheckpoint, { ...params }) -> PermissionCreateResponsesPage - client.fineTuning.checkpoints.permissions.retrieve(fineTunedModelCheckpoint, { ...params }) -> PermissionRetrieveResponse -- client.fineTuning.checkpoints.permissions.del(fineTunedModelCheckpoint) -> PermissionDeleteResponse +- client.fineTuning.checkpoints.permissions.del(fineTunedModelCheckpoint, permissionId) -> PermissionDeleteResponse # VectorStores @@ -626,6 +626,10 @@ Types: - ResponseOutputRefusal - ResponseOutputText - ResponseReasoningItem +- ResponseReasoningSummaryPartAddedEvent +- ResponseReasoningSummaryPartDoneEvent +- ResponseReasoningSummaryTextDeltaEvent +- ResponseReasoningSummaryTextDoneEvent - ResponseRefusalDeltaEvent - ResponseRefusalDoneEvent - ResponseStatus diff --git a/jsr.json b/jsr.json index 8271c8522..6b574ce15 100644 --- a/jsr.json +++ b/jsr.json @@ -1,6 +1,6 @@ { "name": "@openai/openai", - "version": "4.95.1", + "version": "4.96.0", "exports": { ".": "./index.ts", "./helpers/zod": "./helpers/zod.ts", diff --git a/package.json b/package.json index 76fe7d4d0..7b4e86f8e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "openai", - "version": "4.95.1", + "version": "4.96.0", "description": "The official TypeScript library for the OpenAI API", "author": "OpenAI ", "types": "dist/index.d.ts", diff --git a/src/resources/beta/assistants.ts b/src/resources/beta/assistants.ts index bf957db95..00a6ff2cf 100644 --- a/src/resources/beta/assistants.ts +++ b/src/resources/beta/assistants.ts @@ -9,6 +9,7 @@ import * as ThreadsAPI from './threads/threads'; import * as RunsAPI from './threads/runs/runs'; import * as StepsAPI from './threads/runs/steps'; import { CursorPage, type CursorPageParams } from '../../pagination'; +import { AssistantStream } from '../../lib/AssistantStream'; export class Assistants extends APIResource { /** @@ -1517,4 +1518,6 @@ export declare namespace Assistants { type AssistantUpdateParams as AssistantUpdateParams, type AssistantListParams as AssistantListParams, }; + + export { AssistantStream }; } diff --git a/src/resources/beta/realtime/realtime.ts b/src/resources/beta/realtime/realtime.ts index 1c02fdd1a..5012b1edd 100644 --- a/src/resources/beta/realtime/realtime.ts +++ b/src/resources/beta/realtime/realtime.ts @@ -915,12 +915,34 @@ export type RealtimeClientEvent = | ConversationItemTruncateEvent | InputAudioBufferAppendEvent | InputAudioBufferClearEvent + | RealtimeClientEvent.OutputAudioBufferClear | InputAudioBufferCommitEvent | ResponseCancelEvent | ResponseCreateEvent | SessionUpdateEvent | TranscriptionSessionUpdate; +export namespace RealtimeClientEvent { + /** + * **WebRTC Only:** Emit to cut off the current audio response. This will trigger + * the server to stop generating audio and emit a `output_audio_buffer.cleared` + * event. This event should be preceded by a `response.cancel` client event to stop + * the generation of the current response. + * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc). + */ + export interface OutputAudioBufferClear { + /** + * The event type, must be `output_audio_buffer.clear`. + */ + type: 'output_audio_buffer.clear'; + + /** + * The unique ID of the client event used for error handling. + */ + event_id?: string; + } +} + /** * The response resource. */ @@ -1174,7 +1196,10 @@ export type RealtimeServerEvent = | ResponseTextDoneEvent | SessionCreatedEvent | SessionUpdatedEvent - | TranscriptionSessionUpdatedEvent; + | TranscriptionSessionUpdatedEvent + | RealtimeServerEvent.OutputAudioBufferStarted + | RealtimeServerEvent.OutputAudioBufferStopped + | RealtimeServerEvent.OutputAudioBufferCleared; export namespace RealtimeServerEvent { /** @@ -1197,6 +1222,77 @@ export namespace RealtimeServerEvent { */ type: 'conversation.item.retrieved'; } + + /** + * **WebRTC Only:** Emitted when the server begins streaming audio to the client. + * This event is emitted after an audio content part has been added + * (`response.content_part.added`) to the response. + * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc). + */ + export interface OutputAudioBufferStarted { + /** + * The unique ID of the server event. + */ + event_id: string; + + /** + * The unique ID of the response that produced the audio. + */ + response_id: string; + + /** + * The event type, must be `output_audio_buffer.started`. + */ + type: 'output_audio_buffer.started'; + } + + /** + * **WebRTC Only:** Emitted when the output audio buffer has been completely + * drained on the server, and no more audio is forthcoming. This event is emitted + * after the full response data has been sent to the client (`response.done`). + * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc). + */ + export interface OutputAudioBufferStopped { + /** + * The unique ID of the server event. + */ + event_id: string; + + /** + * The unique ID of the response that produced the audio. + */ + response_id: string; + + /** + * The event type, must be `output_audio_buffer.stopped`. + */ + type: 'output_audio_buffer.stopped'; + } + + /** + * **WebRTC Only:** Emitted when the output audio buffer is cleared. This happens + * either in VAD mode when the user has interrupted + * (`input_audio_buffer.speech_started`), or when the client has emitted the + * `output_audio_buffer.clear` event to manually cut off the current audio + * response. + * [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc). + */ + export interface OutputAudioBufferCleared { + /** + * The unique ID of the server event. + */ + event_id: string; + + /** + * The unique ID of the response that produced the audio. + */ + response_id: string; + + /** + * The event type, must be `output_audio_buffer.cleared`. + */ + type: 'output_audio_buffer.cleared'; + } } /** diff --git a/src/resources/beta/threads/threads.ts b/src/resources/beta/threads/threads.ts index 8075ba0ac..2e5ab1cc8 100644 --- a/src/resources/beta/threads/threads.ts +++ b/src/resources/beta/threads/threads.ts @@ -686,9 +686,7 @@ export interface ThreadCreateAndRunParamsBase { * Override the tools the assistant can use for this run. This is useful for * modifying the behavior on a per-run basis. */ - tools?: Array< - AssistantsAPI.CodeInterpreterTool | AssistantsAPI.FileSearchTool | AssistantsAPI.FunctionTool - > | null; + tools?: Array | null; /** * An alternative to sampling with temperature, called nucleus sampling, where the @@ -1718,4 +1716,6 @@ export declare namespace Threads { type MessageUpdateParams as MessageUpdateParams, type MessageListParams as MessageListParams, }; + + export { AssistantStream }; } diff --git a/src/resources/evals/evals.ts b/src/resources/evals/evals.ts index 84ff6d1bb..caef7acc0 100644 --- a/src/resources/evals/evals.ts +++ b/src/resources/evals/evals.ts @@ -4,6 +4,7 @@ import { APIResource } from '../../resource'; import { isRequestOptions } from '../../core'; import * as Core from '../../core'; import * as Shared from '../shared'; +import * as ResponsesAPI from '../responses/responses'; import * as RunsAPI from './runs/runs'; import { CreateEvalCompletionsRunDataSource, @@ -107,7 +108,7 @@ export interface EvalCustomDataSourceConfig { * the evaluation. */ export interface EvalLabelModelGrader { - input: Array; + input: Array; /** * The labels to assign to each item in the evaluation. @@ -136,57 +137,43 @@ export interface EvalLabelModelGrader { } export namespace EvalLabelModelGrader { - export interface InputMessage { - content: InputMessage.Content; - + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface Input { /** - * The role of the message. One of `user`, `system`, or `developer`. + * Text inputs to the model - can contain template strings. */ - role: 'user' | 'system' | 'developer'; + content: string | ResponsesAPI.ResponseInputText | Input.OutputText; /** - * The type of item, which is always `message`. + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. */ - type: 'message'; - } - - export namespace InputMessage { - export interface Content { - /** - * The text content. - */ - text: string; - - /** - * The type of content, which is always `input_text`. - */ - type: 'input_text'; - } - } - - export interface Assistant { - content: Assistant.Content; + role: 'user' | 'assistant' | 'system' | 'developer'; /** - * The role of the message. Must be `assistant` for output. + * The type of the message input. Always `message`. */ - role: 'assistant'; + type?: 'message'; + } + export namespace Input { /** - * The type of item, which is always `message`. + * A text output from the model. */ - type: 'message'; - } - - export namespace Assistant { - export interface Content { + export interface OutputText { /** - * The text content. + * The text output from the model. */ text: string; /** - * The type of content, which is always `output_text`. + * The type of the output text. Always `output_text`. */ type: 'output_text'; } @@ -259,8 +246,8 @@ export interface EvalStringCheckGrader { */ export interface EvalTextSimilarityGrader { /** - * The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, `gleu`, - * `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`. + * The evaluation metric to use. One of `fuzzy_match`, `bleu`, `gleu`, `meteor`, + * `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`. */ evaluation_metric: | 'fuzzy_match' @@ -272,8 +259,7 @@ export interface EvalTextSimilarityGrader { | 'rouge_3' | 'rouge_4' | 'rouge_5' - | 'rouge_l' - | 'cosine'; + | 'rouge_l'; /** * The text being graded. @@ -346,14 +332,131 @@ export interface EvalCreateResponse { object: 'eval'; /** - * Indicates whether the evaluation is shared with OpenAI. + * A list of testing criteria. */ - share_with_openai: boolean; + testing_criteria: Array< + | EvalLabelModelGrader + | EvalStringCheckGrader + | EvalTextSimilarityGrader + | EvalCreateResponse.Python + | EvalCreateResponse.ScoreModel + >; +} +export namespace EvalCreateResponse { /** - * A list of testing criteria. + * A PythonGrader object that runs a python script on the input. */ - testing_criteria: Array; + export interface Python { + /** + * The name of the grader. + */ + name: string; + + /** + * The source code of the python script. + */ + source: string; + + /** + * The object type, which is always `python`. + */ + type: 'python'; + + /** + * The image tag to use for the python script. + */ + image_tag?: string; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + } + + /** + * A ScoreModelGrader object that uses a model to assign a score to the input. + */ + export interface ScoreModel { + /** + * The input text. This may include template strings. + */ + input: Array; + + /** + * The model to use for the evaluation. + */ + model: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The object type, which is always `score_model`. + */ + type: 'score_model'; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + + /** + * The range of the score. Defaults to `[0, 1]`. + */ + range?: Array; + + /** + * The sampling parameters for the model. + */ + sampling_params?: unknown; + } + + export namespace ScoreModel { + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface Input { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | Input.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace Input { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } } /** @@ -401,14 +504,131 @@ export interface EvalRetrieveResponse { object: 'eval'; /** - * Indicates whether the evaluation is shared with OpenAI. + * A list of testing criteria. */ - share_with_openai: boolean; + testing_criteria: Array< + | EvalLabelModelGrader + | EvalStringCheckGrader + | EvalTextSimilarityGrader + | EvalRetrieveResponse.Python + | EvalRetrieveResponse.ScoreModel + >; +} +export namespace EvalRetrieveResponse { /** - * A list of testing criteria. + * A PythonGrader object that runs a python script on the input. + */ + export interface Python { + /** + * The name of the grader. + */ + name: string; + + /** + * The source code of the python script. + */ + source: string; + + /** + * The object type, which is always `python`. + */ + type: 'python'; + + /** + * The image tag to use for the python script. + */ + image_tag?: string; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + } + + /** + * A ScoreModelGrader object that uses a model to assign a score to the input. */ - testing_criteria: Array; + export interface ScoreModel { + /** + * The input text. This may include template strings. + */ + input: Array; + + /** + * The model to use for the evaluation. + */ + model: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The object type, which is always `score_model`. + */ + type: 'score_model'; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + + /** + * The range of the score. Defaults to `[0, 1]`. + */ + range?: Array; + + /** + * The sampling parameters for the model. + */ + sampling_params?: unknown; + } + + export namespace ScoreModel { + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface Input { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | Input.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace Input { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } } /** @@ -456,14 +676,131 @@ export interface EvalUpdateResponse { object: 'eval'; /** - * Indicates whether the evaluation is shared with OpenAI. + * A list of testing criteria. + */ + testing_criteria: Array< + | EvalLabelModelGrader + | EvalStringCheckGrader + | EvalTextSimilarityGrader + | EvalUpdateResponse.Python + | EvalUpdateResponse.ScoreModel + >; +} + +export namespace EvalUpdateResponse { + /** + * A PythonGrader object that runs a python script on the input. */ - share_with_openai: boolean; + export interface Python { + /** + * The name of the grader. + */ + name: string; + + /** + * The source code of the python script. + */ + source: string; + + /** + * The object type, which is always `python`. + */ + type: 'python'; + + /** + * The image tag to use for the python script. + */ + image_tag?: string; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + } /** - * A list of testing criteria. + * A ScoreModelGrader object that uses a model to assign a score to the input. */ - testing_criteria: Array; + export interface ScoreModel { + /** + * The input text. This may include template strings. + */ + input: Array; + + /** + * The model to use for the evaluation. + */ + model: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The object type, which is always `score_model`. + */ + type: 'score_model'; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + + /** + * The range of the score. Defaults to `[0, 1]`. + */ + range?: Array; + + /** + * The sampling parameters for the model. + */ + sampling_params?: unknown; + } + + export namespace ScoreModel { + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface Input { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | Input.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace Input { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } } /** @@ -511,14 +848,131 @@ export interface EvalListResponse { object: 'eval'; /** - * Indicates whether the evaluation is shared with OpenAI. + * A list of testing criteria. */ - share_with_openai: boolean; + testing_criteria: Array< + | EvalLabelModelGrader + | EvalStringCheckGrader + | EvalTextSimilarityGrader + | EvalListResponse.Python + | EvalListResponse.ScoreModel + >; +} +export namespace EvalListResponse { /** - * A list of testing criteria. + * A PythonGrader object that runs a python script on the input. + */ + export interface Python { + /** + * The name of the grader. + */ + name: string; + + /** + * The source code of the python script. + */ + source: string; + + /** + * The object type, which is always `python`. + */ + type: 'python'; + + /** + * The image tag to use for the python script. + */ + image_tag?: string; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + } + + /** + * A ScoreModelGrader object that uses a model to assign a score to the input. */ - testing_criteria: Array; + export interface ScoreModel { + /** + * The input text. This may include template strings. + */ + input: Array; + + /** + * The model to use for the evaluation. + */ + model: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The object type, which is always `score_model`. + */ + type: 'score_model'; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + + /** + * The range of the score. Defaults to `[0, 1]`. + */ + range?: Array; + + /** + * The sampling parameters for the model. + */ + sampling_params?: unknown; + } + + export namespace ScoreModel { + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface Input { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | Input.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace Input { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } } export interface EvalDeleteResponse { @@ -533,12 +987,18 @@ export interface EvalCreateParams { /** * The configuration for the data source used for the evaluation runs. */ - data_source_config: EvalCreateParams.Custom | EvalCreateParams.StoredCompletions; + data_source_config: EvalCreateParams.Custom | EvalCreateParams.Logs; /** * A list of graders for all eval runs in this group. */ - testing_criteria: Array; + testing_criteria: Array< + | EvalCreateParams.LabelModel + | EvalStringCheckGrader + | EvalTextSimilarityGrader + | EvalCreateParams.Python + | EvalCreateParams.ScoreModel + >; /** * Set of 16 key-value pairs that can be attached to an object. This can be useful @@ -554,11 +1014,6 @@ export interface EvalCreateParams { * The name of the evaluation. */ name?: string; - - /** - * Indicates whether the evaluation is shared with OpenAI. - */ - share_with_openai?: boolean; } export namespace EvalCreateParams { @@ -572,7 +1027,7 @@ export namespace EvalCreateParams { */ export interface Custom { /** - * The json schema for the run data source items. + * The json schema for each row in the data source. */ item_schema: Record; @@ -582,7 +1037,8 @@ export namespace EvalCreateParams { type: 'custom'; /** - * Whether to include the sample schema in the data source. + * Whether the eval should expect you to populate the sample namespace (ie, by + * generating responses off of your data source) */ include_sample_schema?: boolean; } @@ -592,21 +1048,16 @@ export namespace EvalCreateParams { * completions query. This is usually metadata like `usecase=chatbot` or * `prompt-version=v2`, etc. */ - export interface StoredCompletions { + export interface Logs { /** - * The type of data source. Always `stored_completions`. + * The type of data source. Always `logs`. */ - type: 'stored_completions'; + type: 'logs'; /** - * Set of 16 key-value pairs that can be attached to an object. This can be useful - * for storing additional information about the object in a structured format, and - * querying for objects via API or the dashboard. - * - * Keys are strings with a maximum length of 64 characters. Values are strings with - * a maximum length of 512 characters. + * Metadata filters for the logs data source. */ - metadata?: Shared.Metadata | null; + metadata?: Record; } /** @@ -614,7 +1065,11 @@ export namespace EvalCreateParams { * the evaluation. */ export interface LabelModel { - input: Array; + /** + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. + */ + input: Array; /** * The labels to classify to each item in the evaluation. @@ -655,57 +1110,157 @@ export namespace EvalCreateParams { role: string; } - export interface InputMessage { - content: InputMessage.Content; + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface EvalItem { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText; /** - * The role of the message. One of `user`, `system`, or `developer`. + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. */ - role: 'user' | 'system' | 'developer'; + role: 'user' | 'assistant' | 'system' | 'developer'; /** - * The type of item, which is always `message`. + * The type of the message input. Always `message`. */ - type: 'message'; + type?: 'message'; } - export namespace InputMessage { - export interface Content { + export namespace EvalItem { + /** + * A text output from the model. + */ + export interface OutputText { /** - * The text content. + * The text output from the model. */ text: string; /** - * The type of content, which is always `input_text`. + * The type of the output text. Always `output_text`. */ - type: 'input_text'; + type: 'output_text'; } } + } + + /** + * A PythonGrader object that runs a python script on the input. + */ + export interface Python { + /** + * The name of the grader. + */ + name: string; - export interface OutputMessage { - content: OutputMessage.Content; + /** + * The source code of the python script. + */ + source: string; + + /** + * The object type, which is always `python`. + */ + type: 'python'; + + /** + * The image tag to use for the python script. + */ + image_tag?: string; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + } + + /** + * A ScoreModelGrader object that uses a model to assign a score to the input. + */ + export interface ScoreModel { + /** + * The input text. This may include template strings. + */ + input: Array; + + /** + * The model to use for the evaluation. + */ + model: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The object type, which is always `score_model`. + */ + type: 'score_model'; + + /** + * The threshold for the score. + */ + pass_threshold?: number; + + /** + * The range of the score. Defaults to `[0, 1]`. + */ + range?: Array; + /** + * The sampling parameters for the model. + */ + sampling_params?: unknown; + } + + export namespace ScoreModel { + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface Input { /** - * The role of the message. Must be `assistant` for output. + * Text inputs to the model - can contain template strings. */ - role: 'assistant'; + content: string | ResponsesAPI.ResponseInputText | Input.OutputText; /** - * The type of item, which is always `message`. + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. */ - type: 'message'; + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; } - export namespace OutputMessage { - export interface Content { + export namespace Input { + /** + * A text output from the model. + */ + export interface OutputText { /** - * The text content. + * The text output from the model. */ text: string; /** - * The type of content, which is always `output_text`. + * The type of the output text. Always `output_text`. */ type: 'output_text'; } diff --git a/src/resources/evals/runs/runs.ts b/src/resources/evals/runs/runs.ts index ca2b7f424..50c07a514 100644 --- a/src/resources/evals/runs/runs.ts +++ b/src/resources/evals/runs/runs.ts @@ -4,6 +4,7 @@ import { APIResource } from '../../../resource'; import { isRequestOptions } from '../../../core'; import * as Core from '../../../core'; import * as Shared from '../../shared'; +import * as ResponsesAPI from '../../responses/responses'; import * as OutputItemsAPI from './output-items'; import { OutputItemListParams, @@ -83,15 +84,6 @@ export class RunListResponsesPage extends CursorPage {} * A CompletionsRunDataSource object describing a model sampling configuration. */ export interface CreateEvalCompletionsRunDataSource { - input_messages: - | CreateEvalCompletionsRunDataSource.Template - | CreateEvalCompletionsRunDataSource.ItemReference; - - /** - * The name of the model to use for generating completions (e.g. "o3-mini"). - */ - model: string; - /** * A StoredCompletionsRunDataSource configuration describing a set of filters */ @@ -105,105 +97,19 @@ export interface CreateEvalCompletionsRunDataSource { */ type: 'completions'; + input_messages?: + | CreateEvalCompletionsRunDataSource.Template + | CreateEvalCompletionsRunDataSource.ItemReference; + + /** + * The name of the model to use for generating completions (e.g. "o3-mini"). + */ + model?: string; + sampling_params?: CreateEvalCompletionsRunDataSource.SamplingParams; } export namespace CreateEvalCompletionsRunDataSource { - export interface Template { - /** - * A list of chat messages forming the prompt or context. May include variable - * references to the "item" namespace, ie {{item.name}}. - */ - template: Array; - - /** - * The type of input messages. Always `template`. - */ - type: 'template'; - } - - export namespace Template { - export interface ChatMessage { - /** - * The content of the message. - */ - content: string; - - /** - * The role of the message (e.g. "system", "assistant", "user"). - */ - role: string; - } - - export interface InputMessage { - content: InputMessage.Content; - - /** - * The role of the message. One of `user`, `system`, or `developer`. - */ - role: 'user' | 'system' | 'developer'; - - /** - * The type of item, which is always `message`. - */ - type: 'message'; - } - - export namespace InputMessage { - export interface Content { - /** - * The text content. - */ - text: string; - - /** - * The type of content, which is always `input_text`. - */ - type: 'input_text'; - } - } - - export interface OutputMessage { - content: OutputMessage.Content; - - /** - * The role of the message. Must be `assistant` for output. - */ - role: 'assistant'; - - /** - * The type of item, which is always `message`. - */ - type: 'message'; - } - - export namespace OutputMessage { - export interface Content { - /** - * The text content. - */ - text: string; - - /** - * The type of content, which is always `output_text`. - */ - type: 'output_text'; - } - } - } - - export interface ItemReference { - /** - * A reference to a variable in the "item" namespace. Ie, "item.name" - */ - item_reference: string; - - /** - * The type of input messages. Always `item_reference`. - */ - type: 'item_reference'; - } - export interface FileContent { /** * The content of the jsonl file. @@ -240,20 +146,25 @@ export namespace CreateEvalCompletionsRunDataSource { * A StoredCompletionsRunDataSource configuration describing a set of filters */ export interface StoredCompletions { + /** + * The type of source. Always `stored_completions`. + */ + type: 'stored_completions'; + /** * An optional Unix timestamp to filter items created after this time. */ - created_after: number | null; + created_after?: number | null; /** * An optional Unix timestamp to filter items created before this time. */ - created_before: number | null; + created_before?: number | null; /** * An optional maximum number of items to return. */ - limit: number | null; + limit?: number | null; /** * Set of 16 key-value pairs that can be attached to an object. This can be useful @@ -263,17 +174,81 @@ export namespace CreateEvalCompletionsRunDataSource { * Keys are strings with a maximum length of 64 characters. Values are strings with * a maximum length of 512 characters. */ - metadata: Shared.Metadata | null; + metadata?: Shared.Metadata | null; /** * An optional model to filter by (e.g., 'gpt-4o'). */ - model: string | null; + model?: string | null; + } + export interface Template { /** - * The type of source. Always `stored_completions`. + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. */ - type: 'stored_completions'; + template: Array; + + /** + * The type of input messages. Always `template`. + */ + type: 'template'; + } + + export namespace Template { + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface Message { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | Message.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace Message { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } + + export interface ItemReference { + /** + * A reference to a variable in the "item" namespace. Ie, "item.name" + */ + item_reference: string; + + /** + * The type of input messages. Always `item_reference`. + */ + type: 'item_reference'; } export interface SamplingParams { @@ -378,7 +353,10 @@ export interface RunCreateResponse { /** * Information about the run's data source. */ - data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + data_source: + | CreateEvalJSONLRunDataSource + | CreateEvalCompletionsRunDataSource + | RunCreateResponse.Completions; /** * An object representing an error response from the Eval API. @@ -442,162 +420,240 @@ export interface RunCreateResponse { } export namespace RunCreateResponse { - export interface PerModelUsage { - /** - * The number of tokens retrieved from cache. - */ - cached_tokens: number; - + /** + * A ResponsesRunDataSource object describing a model sampling configuration. + */ + export interface Completions { /** - * The number of completion tokens generated. + * A EvalResponsesSource object describing a run data source configuration. */ - completion_tokens: number; + source: Completions.FileContent | Completions.FileID | Completions.Responses; /** - * The number of invocations. + * The type of run data source. Always `completions`. */ - invocation_count: number; + type: 'completions'; - /** - * The name of the model. - */ - model_name: string; + input_messages?: Completions.Template | Completions.ItemReference; /** - * The number of prompt tokens used. + * The name of the model to use for generating completions (e.g. "o3-mini"). */ - prompt_tokens: number; + model?: string; - /** - * The total number of tokens used. - */ - total_tokens: number; + sampling_params?: Completions.SamplingParams; } - export interface PerTestingCriteriaResult { - /** - * Number of tests failed for this criteria. - */ - failed: number; + export namespace Completions { + export interface FileContent { + /** + * The content of the jsonl file. + */ + content: Array; - /** - * Number of tests passed for this criteria. - */ - passed: number; + /** + * The type of jsonl source. Always `file_content`. + */ + type: 'file_content'; + } - /** - * A description of the testing criteria. - */ - testing_criteria: string; - } + export namespace FileContent { + export interface Content { + item: Record; - /** - * Counters summarizing the outcomes of the evaluation run. - */ - export interface ResultCounts { - /** - * Number of output items that resulted in an error. - */ - errored: number; + sample?: Record; + } + } - /** - * Number of output items that failed to pass the evaluation. - */ - failed: number; + export interface FileID { + /** + * The identifier of the file. + */ + id: string; - /** - * Number of output items that passed the evaluation. - */ - passed: number; + /** + * The type of jsonl source. Always `file_id`. + */ + type: 'file_id'; + } /** - * Total number of executed output items. + * A EvalResponsesSource object describing a run data source configuration. */ - total: number; - } -} + export interface Responses { + /** + * The type of run data source. Always `responses`. + */ + type: 'responses'; -/** - * A schema representing an evaluation run. - */ -export interface RunRetrieveResponse { - /** - * Unique identifier for the evaluation run. - */ - id: string; + /** + * Whether to allow parallel tool calls. This is a query parameter used to select + * responses. + */ + allow_parallel_tool_calls?: boolean | null; - /** - * Unix timestamp (in seconds) when the evaluation run was created. - */ - created_at: number; + /** + * Only include items created after this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_after?: number | null; - /** - * Information about the run's data source. - */ - data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + /** + * Only include items created before this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_before?: number | null; - /** - * An object representing an error response from the Eval API. - */ - error: EvalAPIError; + /** + * Whether the response has tool calls. This is a query parameter used to select + * responses. + */ + has_tool_calls?: boolean | null; - /** - * The identifier of the associated evaluation. - */ - eval_id: string; + /** + * Optional search string for instructions. This is a query parameter used to + * select responses. + */ + instructions_search?: string | null; - /** - * Set of 16 key-value pairs that can be attached to an object. This can be useful - * for storing additional information about the object in a structured format, and - * querying for objects via API or the dashboard. - * - * Keys are strings with a maximum length of 64 characters. Values are strings with - * a maximum length of 512 characters. - */ - metadata: Shared.Metadata | null; + /** + * Metadata filter for the responses. This is a query parameter used to select + * responses. + */ + metadata?: unknown | null; - /** - * The model that is evaluated, if applicable. - */ - model: string; + /** + * The name of the model to find responses for. This is a query parameter used to + * select responses. + */ + model?: string | null; - /** - * The name of the evaluation run. - */ - name: string; + /** + * Optional reasoning effort parameter. This is a query parameter used to select + * responses. + */ + reasoning_effort?: Shared.ReasoningEffort | null; - /** - * The type of the object. Always "eval.run". - */ - object: 'eval.run'; + /** + * Sampling temperature. This is a query parameter used to select responses. + */ + temperature?: number | null; - /** - * Usage statistics for each model during the evaluation run. - */ - per_model_usage: Array; + /** + * Nucleus sampling parameter. This is a query parameter used to select responses. + */ + top_p?: number | null; - /** - * Results per testing criteria applied during the evaluation run. - */ - per_testing_criteria_results: Array; + /** + * List of user identifiers. This is a query parameter used to select responses. + */ + users?: Array | null; + } - /** - * The URL to the rendered evaluation run report on the UI dashboard. - */ - report_url: string; + export interface Template { + /** + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. + */ + template: Array; - /** - * Counters summarizing the outcomes of the evaluation run. - */ - result_counts: RunRetrieveResponse.ResultCounts; + /** + * The type of input messages. Always `template`. + */ + type: 'template'; + } - /** - * The status of the evaluation run. - */ - status: string; -} + export namespace Template { + export interface ChatMessage { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role: string; + } + + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface EvalItem { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace EvalItem { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } + + export interface ItemReference { + /** + * A reference to a variable in the "item" namespace. Ie, "item.name" + */ + item_reference: string; + + /** + * The type of input messages. Always `item_reference`. + */ + type: 'item_reference'; + } + + export interface SamplingParams { + /** + * The maximum number of tokens in the generated output. + */ + max_completion_tokens?: number; + + /** + * A seed value to initialize the randomness, during sampling. + */ + seed?: number; + + /** + * A higher temperature increases randomness in the outputs. + */ + temperature?: number; + + /** + * An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + */ + top_p?: number; + } + } -export namespace RunRetrieveResponse { export interface PerModelUsage { /** * The number of tokens retrieved from cache. @@ -676,7 +732,7 @@ export namespace RunRetrieveResponse { /** * A schema representing an evaluation run. */ -export interface RunListResponse { +export interface RunRetrieveResponse { /** * Unique identifier for the evaluation run. */ @@ -690,7 +746,10 @@ export interface RunListResponse { /** * Information about the run's data source. */ - data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + data_source: + | CreateEvalJSONLRunDataSource + | CreateEvalCompletionsRunDataSource + | RunRetrieveResponse.Completions; /** * An object representing an error response from the Eval API. @@ -730,12 +789,12 @@ export interface RunListResponse { /** * Usage statistics for each model during the evaluation run. */ - per_model_usage: Array; + per_model_usage: Array; /** * Results per testing criteria applied during the evaluation run. */ - per_testing_criteria_results: Array; + per_testing_criteria_results: Array; /** * The URL to the rendered evaluation run report on the UI dashboard. @@ -745,7 +804,7 @@ export interface RunListResponse { /** * Counters summarizing the outcomes of the evaluation run. */ - result_counts: RunListResponse.ResultCounts; + result_counts: RunRetrieveResponse.ResultCounts; /** * The status of the evaluation run. @@ -753,7 +812,241 @@ export interface RunListResponse { status: string; } -export namespace RunListResponse { +export namespace RunRetrieveResponse { + /** + * A ResponsesRunDataSource object describing a model sampling configuration. + */ + export interface Completions { + /** + * A EvalResponsesSource object describing a run data source configuration. + */ + source: Completions.FileContent | Completions.FileID | Completions.Responses; + + /** + * The type of run data source. Always `completions`. + */ + type: 'completions'; + + input_messages?: Completions.Template | Completions.ItemReference; + + /** + * The name of the model to use for generating completions (e.g. "o3-mini"). + */ + model?: string; + + sampling_params?: Completions.SamplingParams; + } + + export namespace Completions { + export interface FileContent { + /** + * The content of the jsonl file. + */ + content: Array; + + /** + * The type of jsonl source. Always `file_content`. + */ + type: 'file_content'; + } + + export namespace FileContent { + export interface Content { + item: Record; + + sample?: Record; + } + } + + export interface FileID { + /** + * The identifier of the file. + */ + id: string; + + /** + * The type of jsonl source. Always `file_id`. + */ + type: 'file_id'; + } + + /** + * A EvalResponsesSource object describing a run data source configuration. + */ + export interface Responses { + /** + * The type of run data source. Always `responses`. + */ + type: 'responses'; + + /** + * Whether to allow parallel tool calls. This is a query parameter used to select + * responses. + */ + allow_parallel_tool_calls?: boolean | null; + + /** + * Only include items created after this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_after?: number | null; + + /** + * Only include items created before this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_before?: number | null; + + /** + * Whether the response has tool calls. This is a query parameter used to select + * responses. + */ + has_tool_calls?: boolean | null; + + /** + * Optional search string for instructions. This is a query parameter used to + * select responses. + */ + instructions_search?: string | null; + + /** + * Metadata filter for the responses. This is a query parameter used to select + * responses. + */ + metadata?: unknown | null; + + /** + * The name of the model to find responses for. This is a query parameter used to + * select responses. + */ + model?: string | null; + + /** + * Optional reasoning effort parameter. This is a query parameter used to select + * responses. + */ + reasoning_effort?: Shared.ReasoningEffort | null; + + /** + * Sampling temperature. This is a query parameter used to select responses. + */ + temperature?: number | null; + + /** + * Nucleus sampling parameter. This is a query parameter used to select responses. + */ + top_p?: number | null; + + /** + * List of user identifiers. This is a query parameter used to select responses. + */ + users?: Array | null; + } + + export interface Template { + /** + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. + */ + template: Array; + + /** + * The type of input messages. Always `template`. + */ + type: 'template'; + } + + export namespace Template { + export interface ChatMessage { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role: string; + } + + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface EvalItem { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace EvalItem { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } + + export interface ItemReference { + /** + * A reference to a variable in the "item" namespace. Ie, "item.name" + */ + item_reference: string; + + /** + * The type of input messages. Always `item_reference`. + */ + type: 'item_reference'; + } + + export interface SamplingParams { + /** + * The maximum number of tokens in the generated output. + */ + max_completion_tokens?: number; + + /** + * A seed value to initialize the randomness, during sampling. + */ + seed?: number; + + /** + * A higher temperature increases randomness in the outputs. + */ + temperature?: number; + + /** + * An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + */ + top_p?: number; + } + } + export interface PerModelUsage { /** * The number of tokens retrieved from cache. @@ -829,18 +1122,10 @@ export namespace RunListResponse { } } -export interface RunDeleteResponse { - deleted?: boolean; - - object?: string; - - run_id?: string; -} - /** * A schema representing an evaluation run. */ -export interface RunCancelResponse { +export interface RunListResponse { /** * Unique identifier for the evaluation run. */ @@ -854,7 +1139,10 @@ export interface RunCancelResponse { /** * Information about the run's data source. */ - data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + data_source: + | CreateEvalJSONLRunDataSource + | CreateEvalCompletionsRunDataSource + | RunListResponse.Completions; /** * An object representing an error response from the Eval API. @@ -894,12 +1182,12 @@ export interface RunCancelResponse { /** * Usage statistics for each model during the evaluation run. */ - per_model_usage: Array; + per_model_usage: Array; /** * Results per testing criteria applied during the evaluation run. */ - per_testing_criteria_results: Array; + per_testing_criteria_results: Array; /** * The URL to the rendered evaluation run report on the UI dashboard. @@ -909,7 +1197,7 @@ export interface RunCancelResponse { /** * Counters summarizing the outcomes of the evaluation run. */ - result_counts: RunCancelResponse.ResultCounts; + result_counts: RunListResponse.ResultCounts; /** * The status of the evaluation run. @@ -917,25 +1205,660 @@ export interface RunCancelResponse { status: string; } -export namespace RunCancelResponse { - export interface PerModelUsage { +export namespace RunListResponse { + /** + * A ResponsesRunDataSource object describing a model sampling configuration. + */ + export interface Completions { /** - * The number of tokens retrieved from cache. + * A EvalResponsesSource object describing a run data source configuration. */ - cached_tokens: number; + source: Completions.FileContent | Completions.FileID | Completions.Responses; /** - * The number of completion tokens generated. + * The type of run data source. Always `completions`. */ - completion_tokens: number; + type: 'completions'; - /** - * The number of invocations. - */ - invocation_count: number; + input_messages?: Completions.Template | Completions.ItemReference; /** - * The name of the model. + * The name of the model to use for generating completions (e.g. "o3-mini"). + */ + model?: string; + + sampling_params?: Completions.SamplingParams; + } + + export namespace Completions { + export interface FileContent { + /** + * The content of the jsonl file. + */ + content: Array; + + /** + * The type of jsonl source. Always `file_content`. + */ + type: 'file_content'; + } + + export namespace FileContent { + export interface Content { + item: Record; + + sample?: Record; + } + } + + export interface FileID { + /** + * The identifier of the file. + */ + id: string; + + /** + * The type of jsonl source. Always `file_id`. + */ + type: 'file_id'; + } + + /** + * A EvalResponsesSource object describing a run data source configuration. + */ + export interface Responses { + /** + * The type of run data source. Always `responses`. + */ + type: 'responses'; + + /** + * Whether to allow parallel tool calls. This is a query parameter used to select + * responses. + */ + allow_parallel_tool_calls?: boolean | null; + + /** + * Only include items created after this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_after?: number | null; + + /** + * Only include items created before this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_before?: number | null; + + /** + * Whether the response has tool calls. This is a query parameter used to select + * responses. + */ + has_tool_calls?: boolean | null; + + /** + * Optional search string for instructions. This is a query parameter used to + * select responses. + */ + instructions_search?: string | null; + + /** + * Metadata filter for the responses. This is a query parameter used to select + * responses. + */ + metadata?: unknown | null; + + /** + * The name of the model to find responses for. This is a query parameter used to + * select responses. + */ + model?: string | null; + + /** + * Optional reasoning effort parameter. This is a query parameter used to select + * responses. + */ + reasoning_effort?: Shared.ReasoningEffort | null; + + /** + * Sampling temperature. This is a query parameter used to select responses. + */ + temperature?: number | null; + + /** + * Nucleus sampling parameter. This is a query parameter used to select responses. + */ + top_p?: number | null; + + /** + * List of user identifiers. This is a query parameter used to select responses. + */ + users?: Array | null; + } + + export interface Template { + /** + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. + */ + template: Array; + + /** + * The type of input messages. Always `template`. + */ + type: 'template'; + } + + export namespace Template { + export interface ChatMessage { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role: string; + } + + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface EvalItem { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace EvalItem { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } + + export interface ItemReference { + /** + * A reference to a variable in the "item" namespace. Ie, "item.name" + */ + item_reference: string; + + /** + * The type of input messages. Always `item_reference`. + */ + type: 'item_reference'; + } + + export interface SamplingParams { + /** + * The maximum number of tokens in the generated output. + */ + max_completion_tokens?: number; + + /** + * A seed value to initialize the randomness, during sampling. + */ + seed?: number; + + /** + * A higher temperature increases randomness in the outputs. + */ + temperature?: number; + + /** + * An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + */ + top_p?: number; + } + } + + export interface PerModelUsage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of invocations. + */ + invocation_count: number; + + /** + * The name of the model. + */ + model_name: string; + + /** + * The number of prompt tokens used. + */ + prompt_tokens: number; + + /** + * The total number of tokens used. + */ + total_tokens: number; + } + + export interface PerTestingCriteriaResult { + /** + * Number of tests failed for this criteria. + */ + failed: number; + + /** + * Number of tests passed for this criteria. + */ + passed: number; + + /** + * A description of the testing criteria. + */ + testing_criteria: string; + } + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + export interface ResultCounts { + /** + * Number of output items that resulted in an error. + */ + errored: number; + + /** + * Number of output items that failed to pass the evaluation. + */ + failed: number; + + /** + * Number of output items that passed the evaluation. + */ + passed: number; + + /** + * Total number of executed output items. + */ + total: number; + } +} + +export interface RunDeleteResponse { + deleted?: boolean; + + object?: string; + + run_id?: string; +} + +/** + * A schema representing an evaluation run. + */ +export interface RunCancelResponse { + /** + * Unique identifier for the evaluation run. + */ + id: string; + + /** + * Unix timestamp (in seconds) when the evaluation run was created. + */ + created_at: number; + + /** + * Information about the run's data source. + */ + data_source: + | CreateEvalJSONLRunDataSource + | CreateEvalCompletionsRunDataSource + | RunCancelResponse.Completions; + + /** + * An object representing an error response from the Eval API. + */ + error: EvalAPIError; + + /** + * The identifier of the associated evaluation. + */ + eval_id: string; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The model that is evaluated, if applicable. + */ + model: string; + + /** + * The name of the evaluation run. + */ + name: string; + + /** + * The type of the object. Always "eval.run". + */ + object: 'eval.run'; + + /** + * Usage statistics for each model during the evaluation run. + */ + per_model_usage: Array; + + /** + * Results per testing criteria applied during the evaluation run. + */ + per_testing_criteria_results: Array; + + /** + * The URL to the rendered evaluation run report on the UI dashboard. + */ + report_url: string; + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + result_counts: RunCancelResponse.ResultCounts; + + /** + * The status of the evaluation run. + */ + status: string; +} + +export namespace RunCancelResponse { + /** + * A ResponsesRunDataSource object describing a model sampling configuration. + */ + export interface Completions { + /** + * A EvalResponsesSource object describing a run data source configuration. + */ + source: Completions.FileContent | Completions.FileID | Completions.Responses; + + /** + * The type of run data source. Always `completions`. + */ + type: 'completions'; + + input_messages?: Completions.Template | Completions.ItemReference; + + /** + * The name of the model to use for generating completions (e.g. "o3-mini"). + */ + model?: string; + + sampling_params?: Completions.SamplingParams; + } + + export namespace Completions { + export interface FileContent { + /** + * The content of the jsonl file. + */ + content: Array; + + /** + * The type of jsonl source. Always `file_content`. + */ + type: 'file_content'; + } + + export namespace FileContent { + export interface Content { + item: Record; + + sample?: Record; + } + } + + export interface FileID { + /** + * The identifier of the file. + */ + id: string; + + /** + * The type of jsonl source. Always `file_id`. + */ + type: 'file_id'; + } + + /** + * A EvalResponsesSource object describing a run data source configuration. + */ + export interface Responses { + /** + * The type of run data source. Always `responses`. + */ + type: 'responses'; + + /** + * Whether to allow parallel tool calls. This is a query parameter used to select + * responses. + */ + allow_parallel_tool_calls?: boolean | null; + + /** + * Only include items created after this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_after?: number | null; + + /** + * Only include items created before this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_before?: number | null; + + /** + * Whether the response has tool calls. This is a query parameter used to select + * responses. + */ + has_tool_calls?: boolean | null; + + /** + * Optional search string for instructions. This is a query parameter used to + * select responses. + */ + instructions_search?: string | null; + + /** + * Metadata filter for the responses. This is a query parameter used to select + * responses. + */ + metadata?: unknown | null; + + /** + * The name of the model to find responses for. This is a query parameter used to + * select responses. + */ + model?: string | null; + + /** + * Optional reasoning effort parameter. This is a query parameter used to select + * responses. + */ + reasoning_effort?: Shared.ReasoningEffort | null; + + /** + * Sampling temperature. This is a query parameter used to select responses. + */ + temperature?: number | null; + + /** + * Nucleus sampling parameter. This is a query parameter used to select responses. + */ + top_p?: number | null; + + /** + * List of user identifiers. This is a query parameter used to select responses. + */ + users?: Array | null; + } + + export interface Template { + /** + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. + */ + template: Array; + + /** + * The type of input messages. Always `template`. + */ + type: 'template'; + } + + export namespace Template { + export interface ChatMessage { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role: string; + } + + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface EvalItem { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace EvalItem { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } + + export interface ItemReference { + /** + * A reference to a variable in the "item" namespace. Ie, "item.name" + */ + item_reference: string; + + /** + * The type of input messages. Always `item_reference`. + */ + type: 'item_reference'; + } + + export interface SamplingParams { + /** + * The maximum number of tokens in the generated output. + */ + max_completion_tokens?: number; + + /** + * A seed value to initialize the randomness, during sampling. + */ + seed?: number; + + /** + * A higher temperature increases randomness in the outputs. + */ + temperature?: number; + + /** + * An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + */ + top_p?: number; + } + } + + export interface PerModelUsage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of invocations. + */ + invocation_count: number; + + /** + * The name of the model. */ model_name: string; @@ -997,7 +1920,10 @@ export interface RunCreateParams { /** * Details about the run's data source. */ - data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + data_source: + | CreateEvalJSONLRunDataSource + | CreateEvalCompletionsRunDataSource + | RunCreateParams.CreateEvalResponsesRunDataSource; /** * Set of 16 key-value pairs that can be attached to an object. This can be useful @@ -1015,6 +1941,247 @@ export interface RunCreateParams { name?: string; } +export namespace RunCreateParams { + /** + * A ResponsesRunDataSource object describing a model sampling configuration. + */ + export interface CreateEvalResponsesRunDataSource { + /** + * A EvalResponsesSource object describing a run data source configuration. + */ + source: + | CreateEvalResponsesRunDataSource.FileContent + | CreateEvalResponsesRunDataSource.FileID + | CreateEvalResponsesRunDataSource.Responses; + + /** + * The type of run data source. Always `completions`. + */ + type: 'completions'; + + input_messages?: + | CreateEvalResponsesRunDataSource.Template + | CreateEvalResponsesRunDataSource.ItemReference; + + /** + * The name of the model to use for generating completions (e.g. "o3-mini"). + */ + model?: string; + + sampling_params?: CreateEvalResponsesRunDataSource.SamplingParams; + } + + export namespace CreateEvalResponsesRunDataSource { + export interface FileContent { + /** + * The content of the jsonl file. + */ + content: Array; + + /** + * The type of jsonl source. Always `file_content`. + */ + type: 'file_content'; + } + + export namespace FileContent { + export interface Content { + item: Record; + + sample?: Record; + } + } + + export interface FileID { + /** + * The identifier of the file. + */ + id: string; + + /** + * The type of jsonl source. Always `file_id`. + */ + type: 'file_id'; + } + + /** + * A EvalResponsesSource object describing a run data source configuration. + */ + export interface Responses { + /** + * The type of run data source. Always `responses`. + */ + type: 'responses'; + + /** + * Whether to allow parallel tool calls. This is a query parameter used to select + * responses. + */ + allow_parallel_tool_calls?: boolean | null; + + /** + * Only include items created after this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_after?: number | null; + + /** + * Only include items created before this timestamp (inclusive). This is a query + * parameter used to select responses. + */ + created_before?: number | null; + + /** + * Whether the response has tool calls. This is a query parameter used to select + * responses. + */ + has_tool_calls?: boolean | null; + + /** + * Optional search string for instructions. This is a query parameter used to + * select responses. + */ + instructions_search?: string | null; + + /** + * Metadata filter for the responses. This is a query parameter used to select + * responses. + */ + metadata?: unknown | null; + + /** + * The name of the model to find responses for. This is a query parameter used to + * select responses. + */ + model?: string | null; + + /** + * Optional reasoning effort parameter. This is a query parameter used to select + * responses. + */ + reasoning_effort?: Shared.ReasoningEffort | null; + + /** + * Sampling temperature. This is a query parameter used to select responses. + */ + temperature?: number | null; + + /** + * Nucleus sampling parameter. This is a query parameter used to select responses. + */ + top_p?: number | null; + + /** + * List of user identifiers. This is a query parameter used to select responses. + */ + users?: Array | null; + } + + export interface Template { + /** + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. + */ + template: Array; + + /** + * The type of input messages. Always `template`. + */ + type: 'template'; + } + + export namespace Template { + export interface ChatMessage { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role: string; + } + + /** + * A message input to the model with a role indicating instruction following + * hierarchy. Instructions given with the `developer` or `system` role take + * precedence over instructions given with the `user` role. Messages with the + * `assistant` role are presumed to have been generated by the model in previous + * interactions. + */ + export interface EvalItem { + /** + * Text inputs to the model - can contain template strings. + */ + content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText; + + /** + * The role of the message input. One of `user`, `assistant`, `system`, or + * `developer`. + */ + role: 'user' | 'assistant' | 'system' | 'developer'; + + /** + * The type of the message input. Always `message`. + */ + type?: 'message'; + } + + export namespace EvalItem { + /** + * A text output from the model. + */ + export interface OutputText { + /** + * The text output from the model. + */ + text: string; + + /** + * The type of the output text. Always `output_text`. + */ + type: 'output_text'; + } + } + } + + export interface ItemReference { + /** + * A reference to a variable in the "item" namespace. Ie, "item.name" + */ + item_reference: string; + + /** + * The type of input messages. Always `item_reference`. + */ + type: 'item_reference'; + } + + export interface SamplingParams { + /** + * The maximum number of tokens in the generated output. + */ + max_completion_tokens?: number; + + /** + * A seed value to initialize the randomness, during sampling. + */ + seed?: number; + + /** + * A higher temperature increases randomness in the outputs. + */ + temperature?: number; + + /** + * An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + */ + top_p?: number; + } + } +} + export interface RunListParams extends CursorPageParams { /** * Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for @@ -1023,8 +2190,8 @@ export interface RunListParams extends CursorPageParams { order?: 'asc' | 'desc'; /** - * Filter runs by status. Use "queued" | "in_progress" | "failed" | "completed" | - * "canceled". + * Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed` + * | `canceled`. */ status?: 'queued' | 'in_progress' | 'completed' | 'canceled' | 'failed'; } diff --git a/src/resources/fine-tuning/checkpoints/permissions.ts b/src/resources/fine-tuning/checkpoints/permissions.ts index 500c3de81..e808b2001 100644 --- a/src/resources/fine-tuning/checkpoints/permissions.ts +++ b/src/resources/fine-tuning/checkpoints/permissions.ts @@ -61,9 +61,13 @@ export class Permissions extends APIResource { */ del( fineTunedModelCheckpoint: string, + permissionId: string, options?: Core.RequestOptions, ): Core.APIPromise { - return this._client.delete(`/fine_tuning/checkpoints/${fineTunedModelCheckpoint}/permissions`, options); + return this._client.delete( + `/fine_tuning/checkpoints/${fineTunedModelCheckpoint}/permissions/${permissionId}`, + options, + ); } } diff --git a/src/resources/images.ts b/src/resources/images.ts index 8e1c6d92e..de1882d30 100644 --- a/src/resources/images.ts +++ b/src/resources/images.ts @@ -5,7 +5,7 @@ import * as Core from '../core'; export class Images extends APIResource { /** - * Creates a variation of a given image. + * Creates a variation of a given image. This endpoint only supports `dall-e-2`. */ createVariation( body: ImageCreateVariationParams, @@ -15,7 +15,8 @@ export class Images extends APIResource { } /** - * Creates an edited or extended image given an original image and a prompt. + * Creates an edited or extended image given one or more source images and a + * prompt. This endpoint only supports `gpt-image-1` and `dall-e-2`. */ edit(body: ImageEditParams, options?: Core.RequestOptions): Core.APIPromise { return this._client.post('/images/edits', Core.multipartFormRequestOptions({ body, ...options })); @@ -23,6 +24,7 @@ export class Images extends APIResource { /** * Creates an image given a prompt. + * [Learn more](https://platform.openai.com/docs/guides/images). */ generate(body: ImageGenerateParams, options?: Core.RequestOptions): Core.APIPromise { return this._client.post('/images/generations', { body, ...options }); @@ -30,33 +32,93 @@ export class Images extends APIResource { } /** - * Represents the url or the content of an image generated by the OpenAI API. + * Represents the content or the URL of an image generated by the OpenAI API. */ export interface Image { /** - * The base64-encoded JSON of the generated image, if `response_format` is - * `b64_json`. + * The base64-encoded JSON of the generated image. Default value for `gpt-image-1`, + * and only present if `response_format` is set to `b64_json` for `dall-e-2` and + * `dall-e-3`. */ b64_json?: string; /** - * The prompt that was used to generate the image, if there was any revision to the - * prompt. + * For `dall-e-3` only, the revised prompt that was used to generate the image. */ revised_prompt?: string; /** - * The URL of the generated image, if `response_format` is `url` (default). + * When using `dall-e-2` or `dall-e-3`, the URL of the generated image if + * `response_format` is set to `url` (default value). Unsupported for + * `gpt-image-1`. */ url?: string; } -export type ImageModel = 'dall-e-2' | 'dall-e-3'; +export type ImageModel = 'dall-e-2' | 'dall-e-3' | 'gpt-image-1'; +/** + * The response from the image generation endpoint. + */ export interface ImagesResponse { + /** + * The Unix timestamp (in seconds) of when the image was created. + */ created: number; - data: Array; + /** + * The list of generated images. + */ + data?: Array; + + /** + * For `gpt-image-1` only, the token usage information for the image generation. + */ + usage?: ImagesResponse.Usage; +} + +export namespace ImagesResponse { + /** + * For `gpt-image-1` only, the token usage information for the image generation. + */ + export interface Usage { + /** + * The number of tokens (images and text) in the input prompt. + */ + input_tokens: number; + + /** + * The input tokens detailed information for the image generation. + */ + input_tokens_details: Usage.InputTokensDetails; + + /** + * The number of image tokens in the output image. + */ + output_tokens: number; + + /** + * The total number of tokens (images and text) used for the image generation. + */ + total_tokens: number; + } + + export namespace Usage { + /** + * The input tokens detailed information for the image generation. + */ + export interface InputTokensDetails { + /** + * The number of image tokens in the input prompt. + */ + image_tokens: number; + + /** + * The number of text tokens in the input prompt. + */ + text_tokens: number; + } + } } export interface ImageCreateVariationParams { @@ -73,8 +135,7 @@ export interface ImageCreateVariationParams { model?: (string & {}) | ImageModel | null; /** - * The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only - * `n=1` is supported. + * The number of images to generate. Must be between 1 and 10. */ n?: number | null; @@ -101,27 +162,31 @@ export interface ImageCreateVariationParams { export interface ImageEditParams { /** - * The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask - * is not provided, image must have transparency, which will be used as the mask. + * The image(s) to edit. Must be a supported image file or an array of images. For + * `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than + * 25MB. For `dall-e-2`, you can only provide one image, and it should be a square + * `png` file less than 4MB. */ - image: Core.Uploadable; + image: Core.Uploadable | Array; /** * A text description of the desired image(s). The maximum length is 1000 - * characters. + * characters for `dall-e-2`, and 32000 characters for `gpt-image-1`. */ prompt: string; /** * An additional image whose fully transparent areas (e.g. where alpha is zero) - * indicate where `image` should be edited. Must be a valid PNG file, less than + * indicate where `image` should be edited. If there are multiple images provided, + * the mask will be applied on the first image. Must be a valid PNG file, less than * 4MB, and have the same dimensions as `image`. */ mask?: Core.Uploadable; /** - * The model to use for image generation. Only `dall-e-2` is supported at this - * time. + * The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are + * supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1` + * is used. */ model?: (string & {}) | ImageModel | null; @@ -130,16 +195,25 @@ export interface ImageEditParams { */ n?: number | null; + /** + * The quality of the image that will be generated. `high`, `medium` and `low` are + * only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality. + * Defaults to `auto`. + */ + quality?: 'standard' | 'low' | 'medium' | 'high' | 'auto' | null; + /** * The format in which the generated images are returned. Must be one of `url` or * `b64_json`. URLs are only valid for 60 minutes after the image has been - * generated. + * generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1` + * will always return base64-encoded images. */ response_format?: 'url' | 'b64_json' | null; /** - * The size of the generated images. Must be one of `256x256`, `512x512`, or - * `1024x1024`. + * The size of the generated images. Must be one of `1024x1024`, `1536x1024` + * (landscape), `1024x1536` (portrait), or `auto` (default value) for + * `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. */ size?: '256x256' | '512x512' | '1024x1024' | null; @@ -153,16 +227,36 @@ export interface ImageEditParams { export interface ImageGenerateParams { /** - * A text description of the desired image(s). The maximum length is 1000 - * characters for `dall-e-2` and 4000 characters for `dall-e-3`. + * A text description of the desired image(s). The maximum length is 32000 + * characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters + * for `dall-e-3`. */ prompt: string; /** - * The model to use for image generation. + * Allows to set transparency for the background of the generated image(s). This + * parameter is only supported for `gpt-image-1`. Must be one of `transparent`, + * `opaque` or `auto` (default value). When `auto` is used, the model will + * automatically determine the best background for the image. + * + * If `transparent`, the output format needs to support transparency, so it should + * be set to either `png` (default value) or `webp`. + */ + background?: 'transparent' | 'opaque' | 'auto' | null; + + /** + * The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or + * `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to + * `gpt-image-1` is used. */ model?: (string & {}) | ImageModel | null; + /** + * Control the content-moderation level for images generated by `gpt-image-1`. Must + * be either `low` for less restrictive filtering or `auto` (default value). + */ + moderation?: 'low' | 'auto' | null; + /** * The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only * `n=1` is supported. @@ -170,31 +264,59 @@ export interface ImageGenerateParams { n?: number | null; /** - * The quality of the image that will be generated. `hd` creates images with finer - * details and greater consistency across the image. This param is only supported - * for `dall-e-3`. + * The compression level (0-100%) for the generated images. This parameter is only + * supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and + * defaults to 100. */ - quality?: 'standard' | 'hd'; + output_compression?: number | null; /** - * The format in which the generated images are returned. Must be one of `url` or - * `b64_json`. URLs are only valid for 60 minutes after the image has been - * generated. + * The format in which the generated images are returned. This parameter is only + * supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. + */ + output_format?: 'png' | 'jpeg' | 'webp' | null; + + /** + * The quality of the image that will be generated. + * + * - `auto` (default value) will automatically select the best quality for the + * given model. + * - `high`, `medium` and `low` are supported for `gpt-image-1`. + * - `hd` and `standard` are supported for `dall-e-3`. + * - `standard` is the only option for `dall-e-2`. + */ + quality?: 'standard' | 'hd' | 'low' | 'medium' | 'high' | 'auto' | null; + + /** + * The format in which generated images with `dall-e-2` and `dall-e-3` are + * returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes + * after the image has been generated. This parameter isn't supported for + * `gpt-image-1` which will always return base64-encoded images. */ response_format?: 'url' | 'b64_json' | null; /** - * The size of the generated images. Must be one of `256x256`, `512x512`, or - * `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or - * `1024x1792` for `dall-e-3` models. + * The size of the generated images. Must be one of `1024x1024`, `1536x1024` + * (landscape), `1024x1536` (portrait), or `auto` (default value) for + * `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and + * one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`. */ - size?: '256x256' | '512x512' | '1024x1024' | '1792x1024' | '1024x1792' | null; + size?: + | 'auto' + | '1024x1024' + | '1536x1024' + | '1024x1536' + | '256x256' + | '512x512' + | '1792x1024' + | '1024x1792' + | null; /** - * The style of the generated images. Must be one of `vivid` or `natural`. Vivid - * causes the model to lean towards generating hyper-real and dramatic images. - * Natural causes the model to produce more natural, less hyper-real looking - * images. This param is only supported for `dall-e-3`. + * The style of the generated images. This parameter is only supported for + * `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean + * towards generating hyper-real and dramatic images. Natural causes the model to + * produce more natural, less hyper-real looking images. */ style?: 'vivid' | 'natural' | null; diff --git a/src/resources/responses/responses.ts b/src/resources/responses/responses.ts index 52dd079fc..771b8daf2 100644 --- a/src/resources/responses/responses.ts +++ b/src/resources/responses/responses.ts @@ -2158,6 +2158,160 @@ export namespace ResponseReasoningItem { } } +/** + * Emitted when a new reasoning summary part is added. + */ +export interface ResponseReasoningSummaryPartAddedEvent { + /** + * The ID of the item this summary part is associated with. + */ + item_id: string; + + /** + * The index of the output item this summary part is associated with. + */ + output_index: number; + + /** + * The summary part that was added. + */ + part: ResponseReasoningSummaryPartAddedEvent.Part; + + /** + * The index of the summary part within the reasoning summary. + */ + summary_index: number; + + /** + * The type of the event. Always `response.reasoning_summary_part.added`. + */ + type: 'response.reasoning_summary_part.added'; +} + +export namespace ResponseReasoningSummaryPartAddedEvent { + /** + * The summary part that was added. + */ + export interface Part { + /** + * The text of the summary part. + */ + text: string; + + /** + * The type of the summary part. Always `summary_text`. + */ + type: 'summary_text'; + } +} + +/** + * Emitted when a reasoning summary part is completed. + */ +export interface ResponseReasoningSummaryPartDoneEvent { + /** + * The ID of the item this summary part is associated with. + */ + item_id: string; + + /** + * The index of the output item this summary part is associated with. + */ + output_index: number; + + /** + * The completed summary part. + */ + part: ResponseReasoningSummaryPartDoneEvent.Part; + + /** + * The index of the summary part within the reasoning summary. + */ + summary_index: number; + + /** + * The type of the event. Always `response.reasoning_summary_part.done`. + */ + type: 'response.reasoning_summary_part.done'; +} + +export namespace ResponseReasoningSummaryPartDoneEvent { + /** + * The completed summary part. + */ + export interface Part { + /** + * The text of the summary part. + */ + text: string; + + /** + * The type of the summary part. Always `summary_text`. + */ + type: 'summary_text'; + } +} + +/** + * Emitted when a delta is added to a reasoning summary text. + */ +export interface ResponseReasoningSummaryTextDeltaEvent { + /** + * The text delta that was added to the summary. + */ + delta: string; + + /** + * The ID of the item this summary text delta is associated with. + */ + item_id: string; + + /** + * The index of the output item this summary text delta is associated with. + */ + output_index: number; + + /** + * The index of the summary part within the reasoning summary. + */ + summary_index: number; + + /** + * The type of the event. Always `response.reasoning_summary_text.delta`. + */ + type: 'response.reasoning_summary_text.delta'; +} + +/** + * Emitted when a reasoning summary text is completed. + */ +export interface ResponseReasoningSummaryTextDoneEvent { + /** + * The ID of the item this summary text is associated with. + */ + item_id: string; + + /** + * The index of the output item this summary text is associated with. + */ + output_index: number; + + /** + * The index of the summary part within the reasoning summary. + */ + summary_index: number; + + /** + * The full text of the completed reasoning summary. + */ + text: string; + + /** + * The type of the event. Always `response.reasoning_summary_text.done`. + */ + type: 'response.reasoning_summary_text.done'; +} + /** * Emitted when there is a partial refusal text. */ @@ -2252,6 +2406,10 @@ export type ResponseStreamEvent = | ResponseIncompleteEvent | ResponseOutputItemAddedEvent | ResponseOutputItemDoneEvent + | ResponseReasoningSummaryPartAddedEvent + | ResponseReasoningSummaryPartDoneEvent + | ResponseReasoningSummaryTextDeltaEvent + | ResponseReasoningSummaryTextDoneEvent | ResponseRefusalDeltaEvent | ResponseRefusalDoneEvent | ResponseTextAnnotationDeltaEvent @@ -2967,6 +3125,10 @@ export declare namespace Responses { type ResponseOutputRefusal as ResponseOutputRefusal, type ResponseOutputText as ResponseOutputText, type ResponseReasoningItem as ResponseReasoningItem, + type ResponseReasoningSummaryPartAddedEvent as ResponseReasoningSummaryPartAddedEvent, + type ResponseReasoningSummaryPartDoneEvent as ResponseReasoningSummaryPartDoneEvent, + type ResponseReasoningSummaryTextDeltaEvent as ResponseReasoningSummaryTextDeltaEvent, + type ResponseReasoningSummaryTextDoneEvent as ResponseReasoningSummaryTextDoneEvent, type ResponseRefusalDeltaEvent as ResponseRefusalDeltaEvent, type ResponseRefusalDoneEvent as ResponseRefusalDoneEvent, type ResponseStatus as ResponseStatus, diff --git a/src/version.ts b/src/version.ts index cd1995322..1215a5e79 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '4.95.1'; // x-release-please-version +export const VERSION = '4.96.0'; // x-release-please-version diff --git a/tests/api-resources/evals/evals.test.ts b/tests/api-resources/evals/evals.test.ts index fabc2602a..45d1c4f9b 100644 --- a/tests/api-resources/evals/evals.test.ts +++ b/tests/api-resources/evals/evals.test.ts @@ -47,7 +47,6 @@ describe('resource evals', () => { ], metadata: { foo: 'string' }, name: 'name', - share_with_openai: true, }); }); diff --git a/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts b/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts index e7aceae3e..1e4b40a94 100644 --- a/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts +++ b/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts @@ -61,10 +61,10 @@ describe('resource permissions', () => { ).rejects.toThrow(OpenAI.NotFoundError); }); - // OpenAPI spec is slightly incorrect - test.skip('del', async () => { + test('del', async () => { const responsePromise = client.fineTuning.checkpoints.permissions.del( 'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', + 'cp_zc4Q7MP6XxulcVzj4MZdwsAB', ); const rawResponse = await responsePromise.asResponse(); expect(rawResponse).toBeInstanceOf(Response); @@ -75,13 +75,14 @@ describe('resource permissions', () => { expect(dataAndResponse.response).toBe(rawResponse); }); - // OpenAPI spec is slightly incorrect - test.skip('del: request options instead of params are passed correctly', async () => { + test('del: request options instead of params are passed correctly', async () => { // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error await expect( - client.fineTuning.checkpoints.permissions.del('ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', { - path: '/_stainless_unknown_path', - }), + client.fineTuning.checkpoints.permissions.del( + 'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', + 'cp_zc4Q7MP6XxulcVzj4MZdwsAB', + { path: '/_stainless_unknown_path' }, + ), ).rejects.toThrow(OpenAI.NotFoundError); }); }); diff --git a/tests/api-resources/images.test.ts b/tests/api-resources/images.test.ts index 4f15e20ac..e9b460254 100644 --- a/tests/api-resources/images.test.ts +++ b/tests/api-resources/images.test.ts @@ -54,6 +54,7 @@ describe('resource images', () => { mask: await toFile(Buffer.from('# my file contents'), 'README.md'), model: 'string', n: 1, + quality: 'high', response_format: 'url', size: '1024x1024', user: 'user-1234', @@ -74,9 +75,13 @@ describe('resource images', () => { test('generate: required and optional params', async () => { const response = await client.images.generate({ prompt: 'A cute baby sea otter', + background: 'transparent', model: 'string', + moderation: 'low', n: 1, - quality: 'standard', + output_compression: 100, + output_format: 'png', + quality: 'medium', response_format: 'url', size: '1024x1024', style: 'vivid',