Skip to content

Commit 1e832a1

Browse files
stainless-botRobertCraigie
authored andcommitted
feat(api): add o3-mini
chore: unknown commit message
1 parent 7d3f8ef commit 1e832a1

File tree

21 files changed

+320
-144
lines changed

21 files changed

+320
-144
lines changed

.stats.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
configured_endpoints: 69
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-3904ef6b29a89c98f93a9b7da19879695f3c440564be6384db7af1b734611ede.yml
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-6204952a29973265b9c0d66fc67ffaf53c6a90ae4d75cdacf9d147676f5274c9.yml

api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Types:
55
- <code><a href="./src/resources/shared.ts">ErrorObject</a></code>
66
- <code><a href="./src/resources/shared.ts">FunctionDefinition</a></code>
77
- <code><a href="./src/resources/shared.ts">FunctionParameters</a></code>
8+
- <code><a href="./src/resources/shared.ts">Metadata</a></code>
89
- <code><a href="./src/resources/shared.ts">ResponseFormatJSONObject</a></code>
910
- <code><a href="./src/resources/shared.ts">ResponseFormatJSONSchema</a></code>
1011
- <code><a href="./src/resources/shared.ts">ResponseFormatText</a></code>

src/client.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,7 @@ export declare namespace OpenAI {
924924
export type ErrorObject = API.ErrorObject;
925925
export type FunctionDefinition = API.FunctionDefinition;
926926
export type FunctionParameters = API.FunctionParameters;
927+
export type Metadata = API.Metadata;
927928
export type ResponseFormatJSONObject = API.ResponseFormatJSONObject;
928929
export type ResponseFormatJSONSchema = API.ResponseFormatJSONSchema;
929930
export type ResponseFormatText = API.ResponseFormatText;

src/resources/audio/transcriptions.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ export interface TranscriptionCreateParams<
172172

173173
/**
174174
* The language of the input audio. Supplying the input language in
175-
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
176-
* improve accuracy and latency.
175+
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
176+
* format will improve accuracy and latency.
177177
*/
178178
language?: string;
179179

src/resources/batches.ts

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import { APIResource } from '../resource';
44
import * as BatchesAPI from './batches';
5+
import * as Shared from './shared';
56
import { APIPromise } from '../api-promise';
67
import { CursorPage, type CursorPageParams, PagePromise } from '../pagination';
78
import { RequestOptions } from '../internal/request-options';
@@ -133,11 +134,13 @@ export interface Batch {
133134

134135
/**
135136
* Set of 16 key-value pairs that can be attached to an object. This can be useful
136-
* for storing additional information about the object in a structured format. Keys
137-
* can be a maximum of 64 characters long and values can be a maxium of 512
138-
* characters long.
137+
* for storing additional information about the object in a structured format, and
138+
* querying for objects via API or the dashboard.
139+
*
140+
* Keys are strings with a maximum length of 64 characters. Values are strings with
141+
* a maximum length of 512 characters.
139142
*/
140-
metadata?: unknown | null;
143+
metadata?: Shared.Metadata | null;
141144

142145
/**
143146
* The ID of the file containing the outputs of successfully executed requests.
@@ -232,9 +235,14 @@ export interface BatchCreateParams {
232235
input_file_id: string;
233236

234237
/**
235-
* Optional custom metadata for the batch.
238+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
239+
* for storing additional information about the object in a structured format, and
240+
* querying for objects via API or the dashboard.
241+
*
242+
* Keys are strings with a maximum length of 64 characters. Values are strings with
243+
* a maximum length of 512 characters.
236244
*/
237-
metadata?: Record<string, string> | null;
245+
metadata?: Shared.Metadata | null;
238246
}
239247

240248
export interface BatchListParams extends CursorPageParams {}

src/resources/beta/assistants.ts

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,13 @@ export interface Assistant {
100100

101101
/**
102102
* Set of 16 key-value pairs that can be attached to an object. This can be useful
103-
* for storing additional information about the object in a structured format. Keys
104-
* can be a maximum of 64 characters long and values can be a maxium of 512
105-
* characters long.
103+
* for storing additional information about the object in a structured format, and
104+
* querying for objects via API or the dashboard.
105+
*
106+
* Keys are strings with a maximum length of 64 characters. Values are strings with
107+
* a maximum length of 512 characters.
106108
*/
107-
metadata: unknown | null;
109+
metadata: Shared.Metadata | null;
108110

109111
/**
110112
* ID of the model to use. You can use the
@@ -1107,11 +1109,13 @@ export interface AssistantCreateParams {
11071109

11081110
/**
11091111
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1110-
* for storing additional information about the object in a structured format. Keys
1111-
* can be a maximum of 64 characters long and values can be a maxium of 512
1112-
* characters long.
1112+
* for storing additional information about the object in a structured format, and
1113+
* querying for objects via API or the dashboard.
1114+
*
1115+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1116+
* a maximum length of 512 characters.
11131117
*/
1114-
metadata?: unknown | null;
1118+
metadata?: Shared.Metadata | null;
11151119

11161120
/**
11171121
* The name of the assistant. The maximum length is 256 characters.
@@ -1231,12 +1235,14 @@ export namespace AssistantCreateParams {
12311235
file_ids?: Array<string>;
12321236

12331237
/**
1234-
* Set of 16 key-value pairs that can be attached to a vector store. This can be
1235-
* useful for storing additional information about the vector store in a structured
1236-
* format. Keys can be a maximum of 64 characters long and values can be a maxium
1237-
* of 512 characters long.
1238+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1239+
* for storing additional information about the object in a structured format, and
1240+
* querying for objects via API or the dashboard.
1241+
*
1242+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1243+
* a maximum length of 512 characters.
12381244
*/
1239-
metadata?: unknown;
1245+
metadata?: Shared.Metadata | null;
12401246
}
12411247
}
12421248
}
@@ -1256,11 +1262,13 @@ export interface AssistantUpdateParams {
12561262

12571263
/**
12581264
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1259-
* for storing additional information about the object in a structured format. Keys
1260-
* can be a maximum of 64 characters long and values can be a maxium of 512
1261-
* characters long.
1265+
* for storing additional information about the object in a structured format, and
1266+
* querying for objects via API or the dashboard.
1267+
*
1268+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1269+
* a maximum length of 512 characters.
12621270
*/
1263-
metadata?: unknown | null;
1271+
metadata?: Shared.Metadata | null;
12641272

12651273
/**
12661274
* ID of the model to use. You can use the

src/resources/beta/realtime/realtime.ts

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import { APIResource } from '../../../resource';
44
import * as RealtimeAPI from './realtime';
5+
import * as Shared from '../../shared';
56
import * as SessionsAPI from './sessions';
67
import {
78
Session as SessionsAPISession,
@@ -741,9 +742,38 @@ export interface RealtimeResponse {
741742
id?: string;
742743

743744
/**
744-
* Developer-provided string key-value pairs associated with this response.
745+
* Which conversation the response is added to, determined by the `conversation`
746+
* field in the `response.create` event. If `auto`, the response will be added to
747+
* the default conversation and the value of `conversation_id` will be an id like
748+
* `conv_1234`. If `none`, the response will not be added to any conversation and
749+
* the value of `conversation_id` will be `null`. If responses are being triggered
750+
* by server VAD, the response will be added to the default conversation, thus the
751+
* `conversation_id` will be an id like `conv_1234`.
745752
*/
746-
metadata?: unknown | null;
753+
conversation_id?: string;
754+
755+
/**
756+
* Maximum number of output tokens for a single assistant response, inclusive of
757+
* tool calls, that was used in this response.
758+
*/
759+
max_output_tokens?: number | 'inf';
760+
761+
/**
762+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
763+
* for storing additional information about the object in a structured format, and
764+
* querying for objects via API or the dashboard.
765+
*
766+
* Keys are strings with a maximum length of 64 characters. Values are strings with
767+
* a maximum length of 512 characters.
768+
*/
769+
metadata?: Shared.Metadata | null;
770+
771+
/**
772+
* The set of modalities the model used to respond. If there are multiple
773+
* modalities, the model will pick one, for example if `modalities` is
774+
* `["text", "audio"]`, the model could be responding in either text or audio.
775+
*/
776+
modalities?: Array<'text' | 'audio'>;
747777

748778
/**
749779
* The object type, must be `realtime.response`.
@@ -755,6 +785,11 @@ export interface RealtimeResponse {
755785
*/
756786
output?: Array<ConversationItem>;
757787

788+
/**
789+
* The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
790+
*/
791+
output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
792+
758793
/**
759794
* The final status of the response (`completed`, `cancelled`, `failed`, or
760795
* `incomplete`).
@@ -766,13 +801,24 @@ export interface RealtimeResponse {
766801
*/
767802
status_details?: RealtimeResponseStatus;
768803

804+
/**
805+
* Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
806+
*/
807+
temperature?: number;
808+
769809
/**
770810
* Usage statistics for the Response, this will correspond to billing. A Realtime
771811
* API session will maintain a conversation context and append new Items to the
772812
* Conversation, thus output from previous turns (text and audio tokens) will
773813
* become the input for later turns.
774814
*/
775815
usage?: RealtimeResponseUsage;
816+
817+
/**
818+
* The voice the model used to respond. Current voice options are `alloy`, `ash`,
819+
* `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
820+
*/
821+
voice?: 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
776822
}
777823

778824
/**
@@ -1320,11 +1366,13 @@ export namespace ResponseCreateEvent {
13201366

13211367
/**
13221368
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1323-
* for storing additional information about the object in a structured format. Keys
1324-
* can be a maximum of 64 characters long and values can be a maximum of 512
1325-
* characters long.
1369+
* for storing additional information about the object in a structured format, and
1370+
* querying for objects via API or the dashboard.
1371+
*
1372+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1373+
* a maximum length of 512 characters.
13261374
*/
1327-
metadata?: unknown | null;
1375+
metadata?: Shared.Metadata | null;
13281376

13291377
/**
13301378
* The set of modalities the model can respond with. To disable audio, set this to
@@ -1716,8 +1764,11 @@ export namespace SessionUpdateEvent {
17161764
* Configuration for input audio transcription, defaults to off and can be set to
17171765
* `null` to turn off once on. Input audio transcription is not native to the
17181766
* model, since the model consumes audio directly. Transcription runs
1719-
* asynchronously through Whisper and should be treated as rough guidance rather
1720-
* than the representation understood by the model.
1767+
* asynchronously through
1768+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
1769+
* and should be treated as rough guidance rather than the representation
1770+
* understood by the model. The client can optionally set the language and prompt
1771+
* for transcription, these fields will be passed to the Whisper API.
17211772
*/
17221773
input_audio_transcription?: Session.InputAudioTranscription;
17231774

@@ -1801,15 +1852,33 @@ export namespace SessionUpdateEvent {
18011852
* Configuration for input audio transcription, defaults to off and can be set to
18021853
* `null` to turn off once on. Input audio transcription is not native to the
18031854
* model, since the model consumes audio directly. Transcription runs
1804-
* asynchronously through Whisper and should be treated as rough guidance rather
1805-
* than the representation understood by the model.
1855+
* asynchronously through
1856+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
1857+
* and should be treated as rough guidance rather than the representation
1858+
* understood by the model. The client can optionally set the language and prompt
1859+
* for transcription, these fields will be passed to the Whisper API.
18061860
*/
18071861
export interface InputAudioTranscription {
1862+
/**
1863+
* The language of the input audio. Supplying the input language in
1864+
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
1865+
* format will improve accuracy and latency.
1866+
*/
1867+
language?: string;
1868+
18081869
/**
18091870
* The model to use for transcription, `whisper-1` is the only currently supported
18101871
* model.
18111872
*/
18121873
model?: string;
1874+
1875+
/**
1876+
* An optional text to guide the model's style or continue a previous audio
1877+
* segment. The
1878+
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
1879+
* should match the audio language.
1880+
*/
1881+
prompt?: string;
18131882
}
18141883

18151884
export interface Tool {

src/resources/beta/realtime/sessions.ts

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ export interface SessionCreateResponse {
205205
/**
206206
* Ephemeral key returned by the API.
207207
*/
208-
client_secret?: SessionCreateResponse.ClientSecret;
208+
client_secret: SessionCreateResponse.ClientSecret;
209209

210210
/**
211211
* The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
@@ -294,14 +294,14 @@ export namespace SessionCreateResponse {
294294
* Timestamp for when the token expires. Currently, all tokens expire after one
295295
* minute.
296296
*/
297-
expires_at?: number;
297+
expires_at: number;
298298

299299
/**
300300
* Ephemeral key usable in client environments to authenticate connections to the
301301
* Realtime API. Use this in client-side environments rather than a standard API
302302
* token, which should only be used server-side.
303303
*/
304-
value?: string;
304+
value: string;
305305
}
306306

307307
/**
@@ -387,8 +387,11 @@ export interface SessionCreateParams {
387387
* Configuration for input audio transcription, defaults to off and can be set to
388388
* `null` to turn off once on. Input audio transcription is not native to the
389389
* model, since the model consumes audio directly. Transcription runs
390-
* asynchronously through Whisper and should be treated as rough guidance rather
391-
* than the representation understood by the model.
390+
* asynchronously through
391+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
392+
* and should be treated as rough guidance rather than the representation
393+
* understood by the model. The client can optionally set the language and prompt
394+
* for transcription, these fields will be passed to the Whisper API.
392395
*/
393396
input_audio_transcription?: SessionCreateParams.InputAudioTranscription;
394397

@@ -472,15 +475,33 @@ export namespace SessionCreateParams {
472475
* Configuration for input audio transcription, defaults to off and can be set to
473476
* `null` to turn off once on. Input audio transcription is not native to the
474477
* model, since the model consumes audio directly. Transcription runs
475-
* asynchronously through Whisper and should be treated as rough guidance rather
476-
* than the representation understood by the model.
478+
* asynchronously through
479+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
480+
* and should be treated as rough guidance rather than the representation
481+
* understood by the model. The client can optionally set the language and prompt
482+
* for transcription, these fields will be passed to the Whisper API.
477483
*/
478484
export interface InputAudioTranscription {
485+
/**
486+
* The language of the input audio. Supplying the input language in
487+
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
488+
* format will improve accuracy and latency.
489+
*/
490+
language?: string;
491+
479492
/**
480493
* The model to use for transcription, `whisper-1` is the only currently supported
481494
* model.
482495
*/
483496
model?: string;
497+
498+
/**
499+
* An optional text to guide the model's style or continue a previous audio
500+
* segment. The
501+
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
502+
* should match the audio language.
503+
*/
504+
prompt?: string;
484505
}
485506

486507
export interface Tool {

0 commit comments

Comments
 (0)