Skip to content

Commit d16fc7f

Browse files
feat(api): Realtime API token_limits, Hybrid searching ranking options
1 parent 0406ffb commit d16fc7f

File tree

14 files changed

+325
-50
lines changed

14 files changed

+325
-50
lines changed

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 123
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
3-
openapi_spec_hash: 1560717860bba4105936647dde8f618d
4-
config_hash: 50ee3382a63c021a9f821a935950e926
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
3+
openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
4+
config_hash: 032995825500a503a76da119f5354905

aliases.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,13 @@ type CompoundFilterParam = shared.CompoundFilterParam
294294
// This is an alias to an internal type.
295295
type CustomToolInputFormatUnion = shared.CustomToolInputFormatUnion
296296

297+
// Unconstrained free-form text.
298+
//
297299
// This is an alias to an internal type.
298300
type CustomToolInputFormatText = shared.CustomToolInputFormatText
299301

302+
// A grammar defined by the user.
303+
//
300304
// This is an alias to an internal type.
301305
type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
302306

@@ -305,9 +309,13 @@ type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
305309
// This is an alias to an internal type.
306310
type CustomToolInputFormatUnionParam = shared.CustomToolInputFormatUnionParam
307311

312+
// Unconstrained free-form text.
313+
//
308314
// This is an alias to an internal type.
309315
type CustomToolInputFormatTextParam = shared.CustomToolInputFormatTextParam
310316

317+
// A grammar defined by the user.
318+
//
311319
// This is an alias to an internal type.
312320
type CustomToolInputFormatGrammarParam = shared.CustomToolInputFormatGrammarParam
313321

conversations/aliases.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,13 @@ type CompoundFilterParam = shared.CompoundFilterParam
294294
// This is an alias to an internal type.
295295
type CustomToolInputFormatUnion = shared.CustomToolInputFormatUnion
296296

297+
// Unconstrained free-form text.
298+
//
297299
// This is an alias to an internal type.
298300
type CustomToolInputFormatText = shared.CustomToolInputFormatText
299301

302+
// A grammar defined by the user.
303+
//
300304
// This is an alias to an internal type.
301305
type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
302306

@@ -305,9 +309,13 @@ type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
305309
// This is an alias to an internal type.
306310
type CustomToolInputFormatUnionParam = shared.CustomToolInputFormatUnionParam
307311

312+
// Unconstrained free-form text.
313+
//
308314
// This is an alias to an internal type.
309315
type CustomToolInputFormatTextParam = shared.CustomToolInputFormatTextParam
310316

317+
// A grammar defined by the user.
318+
//
311319
// This is an alias to an internal type.
312320
type CustomToolInputFormatGrammarParam = shared.CustomToolInputFormatGrammarParam
313321

image.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -960,7 +960,10 @@ type ImageEditParams struct {
960960
//
961961
// Any of "transparent", "opaque", "auto".
962962
Background ImageEditParamsBackground `json:"background,omitzero"`
963-
// Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
963+
// Control how much effort the model will exert to match the style and features,
964+
// especially facial features, of input images. This parameter is only supported
965+
// for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
966+
// `low`. Defaults to `low`.
964967
//
965968
// Any of "high", "low".
966969
InputFidelity ImageEditParamsInputFidelity `json:"input_fidelity,omitzero"`
@@ -1059,7 +1062,10 @@ const (
10591062
ImageEditParamsBackgroundAuto ImageEditParamsBackground = "auto"
10601063
)
10611064

1062-
// Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
1065+
// Control how much effort the model will exert to match the style and features,
1066+
// especially facial features, of input images. This parameter is only supported
1067+
// for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
1068+
// `low`. Defaults to `low`.
10631069
type ImageEditParamsInputFidelity string
10641070

10651071
const (

realtime/aliases.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,13 @@ type CompoundFilterParam = shared.CompoundFilterParam
294294
// This is an alias to an internal type.
295295
type CustomToolInputFormatUnion = shared.CustomToolInputFormatUnion
296296

297+
// Unconstrained free-form text.
298+
//
297299
// This is an alias to an internal type.
298300
type CustomToolInputFormatText = shared.CustomToolInputFormatText
299301

302+
// A grammar defined by the user.
303+
//
300304
// This is an alias to an internal type.
301305
type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
302306

@@ -305,9 +309,13 @@ type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
305309
// This is an alias to an internal type.
306310
type CustomToolInputFormatUnionParam = shared.CustomToolInputFormatUnionParam
307311

312+
// Unconstrained free-form text.
313+
//
308314
// This is an alias to an internal type.
309315
type CustomToolInputFormatTextParam = shared.CustomToolInputFormatTextParam
310316

317+
// A grammar defined by the user.
318+
//
311319
// This is an alias to an internal type.
312320
type CustomToolInputFormatGrammarParam = shared.CustomToolInputFormatGrammarParam
313321

realtime/clientsecret.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,19 @@ type RealtimeSessionCreateResponse struct {
124124
// `auto` will create a trace for the session with default values for the workflow
125125
// name, group id, and metadata.
126126
Tracing RealtimeSessionCreateResponseTracingUnion `json:"tracing,nullable"`
127-
// Controls how the realtime conversation is truncated prior to model inference.
128-
// The default is `auto`.
127+
// When the number of tokens in a conversation exceeds the model's input token
128+
// limit, the conversation be truncated, meaning messages (starting from the
129+
// oldest) will not be included in the model's context. A 32k context model with
130+
// 4,096 max output tokens can only include 28,224 tokens in the context before
131+
// truncation occurs. Clients can configure truncation behavior to truncate with a
132+
// lower max token limit, which is an effective way to control token usage and
133+
// cost. Truncation will reduce the number of cached tokens on the next turn
134+
// (busting the cache), since messages are dropped from the beginning of the
135+
// context. However, clients can also configure truncation to retain messages up to
136+
// a fraction of the maximum context size, which will reduce the need for future
137+
// truncations and thus improve the cache rate. Truncation can be disabled
138+
// entirely, which means the server will never truncate but would instead return an
139+
// error if the conversation exceeds the model's input token limit.
129140
Truncation RealtimeTruncationUnion `json:"truncation"`
130141
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
131142
JSON struct {

realtime/realtime.go

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -853,8 +853,19 @@ type RealtimeSessionCreateRequestParam struct {
853853
ToolChoice RealtimeToolChoiceConfigUnionParam `json:"tool_choice,omitzero"`
854854
// Tools available to the model.
855855
Tools RealtimeToolsConfigParam `json:"tools,omitzero"`
856-
// Controls how the realtime conversation is truncated prior to model inference.
857-
// The default is `auto`.
856+
// When the number of tokens in a conversation exceeds the model's input token
857+
// limit, the conversation be truncated, meaning messages (starting from the
858+
// oldest) will not be included in the model's context. A 32k context model with
859+
// 4,096 max output tokens can only include 28,224 tokens in the context before
860+
// truncation occurs. Clients can configure truncation behavior to truncate with a
861+
// lower max token limit, which is an effective way to control token usage and
862+
// cost. Truncation will reduce the number of cached tokens on the next turn
863+
// (busting the cache), since messages are dropped from the beginning of the
864+
// context. However, clients can also configure truncation to retain messages up to
865+
// a fraction of the maximum context size, which will reduce the need for future
866+
// truncations and thus improve the cache rate. Truncation can be disabled
867+
// entirely, which means the server will never truncate but would instead return an
868+
// error if the conversation exceeds the model's input token limit.
858869
Truncation RealtimeTruncationUnionParam `json:"truncation,omitzero"`
859870
// The type of session to create. Always `realtime` for the Realtime API.
860871
//
@@ -1697,10 +1708,13 @@ type RealtimeTruncationUnion struct {
16971708
RetentionRatio float64 `json:"retention_ratio"`
16981709
// This field is from variant [RealtimeTruncationRetentionRatio].
16991710
Type constant.RetentionRatio `json:"type"`
1700-
JSON struct {
1711+
// This field is from variant [RealtimeTruncationRetentionRatio].
1712+
TokenLimits RealtimeTruncationRetentionRatioTokenLimits `json:"token_limits"`
1713+
JSON struct {
17011714
OfRealtimeTruncationStrategy respjson.Field
17021715
RetentionRatio respjson.Field
17031716
Type respjson.Field
1717+
TokenLimits respjson.Field
17041718
raw string
17051719
} `json:"-"`
17061720
}
@@ -1778,15 +1792,21 @@ func (u *RealtimeTruncationUnionParam) asAny() any {
17781792
// input token limit. This allows you to amortize truncations across multiple
17791793
// turns, which can help improve cached token usage.
17801794
type RealtimeTruncationRetentionRatio struct {
1781-
// Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
1782-
// conversation exceeds the input token limit.
1795+
// Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
1796+
// the conversation exceeds the input token limit. Setting this to `0.8` means that
1797+
// messages will be dropped until 80% of the maximum allowed tokens are used. This
1798+
// helps reduce the frequency of truncations and improve cache rates.
17831799
RetentionRatio float64 `json:"retention_ratio,required"`
17841800
// Use retention ratio truncation.
17851801
Type constant.RetentionRatio `json:"type,required"`
1802+
// Optional custom token limits for this truncation strategy. If not provided, the
1803+
// model's default token limits will be used.
1804+
TokenLimits RealtimeTruncationRetentionRatioTokenLimits `json:"token_limits"`
17861805
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
17871806
JSON struct {
17881807
RetentionRatio respjson.Field
17891808
Type respjson.Field
1809+
TokenLimits respjson.Field
17901810
ExtraFields map[string]respjson.Field
17911811
raw string
17921812
} `json:"-"`
@@ -1808,15 +1828,43 @@ func (r RealtimeTruncationRetentionRatio) ToParam() RealtimeTruncationRetentionR
18081828
return param.Override[RealtimeTruncationRetentionRatioParam](json.RawMessage(r.RawJSON()))
18091829
}
18101830

1831+
// Optional custom token limits for this truncation strategy. If not provided, the
1832+
// model's default token limits will be used.
1833+
type RealtimeTruncationRetentionRatioTokenLimits struct {
1834+
// Maximum tokens allowed in the conversation after instructions (which including
1835+
// tool definitions). For example, setting this to 5,000 would mean that truncation
1836+
// would occur when the conversation exceeds 5,000 tokens after instructions. This
1837+
// cannot be higher than the model's context window size minus the maximum output
1838+
// tokens.
1839+
PostInstructions int64 `json:"post_instructions"`
1840+
// JSON contains metadata for fields, check presence with [respjson.Field.Valid].
1841+
JSON struct {
1842+
PostInstructions respjson.Field
1843+
ExtraFields map[string]respjson.Field
1844+
raw string
1845+
} `json:"-"`
1846+
}
1847+
1848+
// Returns the unmodified JSON received from the API
1849+
func (r RealtimeTruncationRetentionRatioTokenLimits) RawJSON() string { return r.JSON.raw }
1850+
func (r *RealtimeTruncationRetentionRatioTokenLimits) UnmarshalJSON(data []byte) error {
1851+
return apijson.UnmarshalRoot(data, r)
1852+
}
1853+
18111854
// Retain a fraction of the conversation tokens when the conversation exceeds the
18121855
// input token limit. This allows you to amortize truncations across multiple
18131856
// turns, which can help improve cached token usage.
18141857
//
18151858
// The properties RetentionRatio, Type are required.
18161859
type RealtimeTruncationRetentionRatioParam struct {
1817-
// Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
1818-
// conversation exceeds the input token limit.
1860+
// Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
1861+
// the conversation exceeds the input token limit. Setting this to `0.8` means that
1862+
// messages will be dropped until 80% of the maximum allowed tokens are used. This
1863+
// helps reduce the frequency of truncations and improve cache rates.
18191864
RetentionRatio float64 `json:"retention_ratio,required"`
1865+
// Optional custom token limits for this truncation strategy. If not provided, the
1866+
// model's default token limits will be used.
1867+
TokenLimits RealtimeTruncationRetentionRatioTokenLimitsParam `json:"token_limits,omitzero"`
18201868
// Use retention ratio truncation.
18211869
//
18221870
// This field can be elided, and will marshal its zero value as "retention_ratio".
@@ -1831,3 +1879,23 @@ func (r RealtimeTruncationRetentionRatioParam) MarshalJSON() (data []byte, err e
18311879
func (r *RealtimeTruncationRetentionRatioParam) UnmarshalJSON(data []byte) error {
18321880
return apijson.UnmarshalRoot(data, r)
18331881
}
1882+
1883+
// Optional custom token limits for this truncation strategy. If not provided, the
1884+
// model's default token limits will be used.
1885+
type RealtimeTruncationRetentionRatioTokenLimitsParam struct {
1886+
// Maximum tokens allowed in the conversation after instructions (which including
1887+
// tool definitions). For example, setting this to 5,000 would mean that truncation
1888+
// would occur when the conversation exceeds 5,000 tokens after instructions. This
1889+
// cannot be higher than the model's context window size minus the maximum output
1890+
// tokens.
1891+
PostInstructions param.Opt[int64] `json:"post_instructions,omitzero"`
1892+
paramObj
1893+
}
1894+
1895+
func (r RealtimeTruncationRetentionRatioTokenLimitsParam) MarshalJSON() (data []byte, err error) {
1896+
type shadow RealtimeTruncationRetentionRatioTokenLimitsParam
1897+
return param.MarshalObject(r, (*shadow)(&r))
1898+
}
1899+
func (r *RealtimeTruncationRetentionRatioTokenLimitsParam) UnmarshalJSON(data []byte) error {
1900+
return apijson.UnmarshalRoot(data, r)
1901+
}

responses/aliases.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,13 @@ type CompoundFilterParam = shared.CompoundFilterParam
294294
// This is an alias to an internal type.
295295
type CustomToolInputFormatUnion = shared.CustomToolInputFormatUnion
296296

297+
// Unconstrained free-form text.
298+
//
297299
// This is an alias to an internal type.
298300
type CustomToolInputFormatText = shared.CustomToolInputFormatText
299301

302+
// A grammar defined by the user.
303+
//
300304
// This is an alias to an internal type.
301305
type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
302306

@@ -305,9 +309,13 @@ type CustomToolInputFormatGrammar = shared.CustomToolInputFormatGrammar
305309
// This is an alias to an internal type.
306310
type CustomToolInputFormatUnionParam = shared.CustomToolInputFormatUnionParam
307311

312+
// Unconstrained free-form text.
313+
//
308314
// This is an alias to an internal type.
309315
type CustomToolInputFormatTextParam = shared.CustomToolInputFormatTextParam
310316

317+
// A grammar defined by the user.
318+
//
311319
// This is an alias to an internal type.
312320
type CustomToolInputFormatGrammarParam = shared.CustomToolInputFormatGrammarParam
313321

0 commit comments

Comments
 (0)