Skip to content

Commit f7f04ea

Browse files
feat(api): Realtime API token_limits, Hybrid searching ranking options
1 parent f6b9f90 commit f7f04ea

36 files changed

+929
-133
lines changed

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 135
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
3-
openapi_spec_hash: 1560717860bba4105936647dde8f618d
4-
config_hash: 50ee3382a63c021a9f821a935950e926
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
3+
openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
4+
config_hash: 032995825500a503a76da119f5354905

lib/openai/models/custom_tool_input_format.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ module CustomToolInputFormat
88

99
discriminator :type
1010

11+
# Unconstrained free-form text.
1112
variant :text, -> { OpenAI::CustomToolInputFormat::Text }
1213

14+
# A grammar defined by the user.
1315
variant :grammar, -> { OpenAI::CustomToolInputFormat::Grammar }
1416

1517
class Text < OpenAI::Internal::Type::BaseModel
@@ -20,6 +22,8 @@ class Text < OpenAI::Internal::Type::BaseModel
2022
required :type, const: :text
2123

2224
# @!method initialize(type: :text)
25+
# Unconstrained free-form text.
26+
#
2327
# @param type [Symbol, :text] Unconstrained text format. Always `text`.
2428
end
2529

@@ -43,6 +47,8 @@ class Grammar < OpenAI::Internal::Type::BaseModel
4347
required :type, const: :grammar
4448

4549
# @!method initialize(definition:, syntax:, type: :grammar)
50+
# A grammar defined by the user.
51+
#
4652
# @param definition [String] The grammar definition.
4753
#
4854
# @param syntax [Symbol, OpenAI::Models::CustomToolInputFormat::Grammar::Syntax] The syntax of the grammar definition. One of `lark` or `regex`.

lib/openai/models/image_edit_params.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ class ImageEditParams < OpenAI::Internal::Type::BaseModel
142142
#
143143
# @param background [Symbol, OpenAI::Models::ImageEditParams::Background, nil] Allows to set transparency for the background of the generated image(s).
144144
#
145-
# @param input_fidelity [Symbol, OpenAI::Models::ImageEditParams::InputFidelity, nil]
145+
# @param input_fidelity [Symbol, OpenAI::Models::ImageEditParams::InputFidelity, nil] Control how much effort the model will exert to match the style and features, es
146146
#
147147
# @param mask [Pathname, StringIO, IO, String, OpenAI::FilePart] An additional image whose fully transparent areas (e.g. where alpha is zero) ind
148148
#

lib/openai/models/realtime/realtime_session_create_request.rb

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,19 @@ class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel
9898
optional :tracing, union: -> { OpenAI::Realtime::RealtimeTracingConfig }, nil?: true
9999

100100
# @!attribute truncation
101-
# Controls how the realtime conversation is truncated prior to model inference.
102-
# The default is `auto`.
101+
# When the number of tokens in a conversation exceeds the model's input token
102+
# limit, the conversation be truncated, meaning messages (starting from the
103+
# oldest) will not be included in the model's context. A 32k context model with
104+
# 4,096 max output tokens can only include 28,224 tokens in the context before
105+
# truncation occurs. Clients can configure truncation behavior to truncate with a
106+
# lower max token limit, which is an effective way to control token usage and
107+
# cost. Truncation will reduce the number of cached tokens on the next turn
108+
# (busting the cache), since messages are dropped from the beginning of the
109+
# context. However, clients can also configure truncation to retain messages up to
110+
# a fraction of the maximum context size, which will reduce the need for future
111+
# truncations and thus improve the cache rate. Truncation can be disabled
112+
# entirely, which means the server will never truncate but would instead return an
113+
# error if the conversation exceeds the model's input token limit.
103114
#
104115
# @return [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio, nil]
105116
optional :truncation, union: -> { OpenAI::Realtime::RealtimeTruncation }
@@ -130,7 +141,7 @@ class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel
130141
#
131142
# @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeTracingConfig::TracingConfiguration, nil] Realtime API can write session traces to the [Traces Dashboard](/logs?api=traces
132143
#
133-
# @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio] Controls how the realtime conversation is truncated prior to model inference.
144+
# @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio] When the number of tokens in a conversation exceeds the model's input token limi
134145
#
135146
# @param type [Symbol, :realtime] The type of session to create. Always `realtime` for the Realtime API.
136147

lib/openai/models/realtime/realtime_session_create_response.rb

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,19 @@ class RealtimeSessionCreateResponse < OpenAI::Internal::Type::BaseModel
106106
optional :tracing, union: -> { OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing }, nil?: true
107107

108108
# @!attribute truncation
109-
# Controls how the realtime conversation is truncated prior to model inference.
110-
# The default is `auto`.
109+
# When the number of tokens in a conversation exceeds the model's input token
110+
# limit, the conversation be truncated, meaning messages (starting from the
111+
# oldest) will not be included in the model's context. A 32k context model with
112+
# 4,096 max output tokens can only include 28,224 tokens in the context before
113+
# truncation occurs. Clients can configure truncation behavior to truncate with a
114+
# lower max token limit, which is an effective way to control token usage and
115+
# cost. Truncation will reduce the number of cached tokens on the next turn
116+
# (busting the cache), since messages are dropped from the beginning of the
117+
# context. However, clients can also configure truncation to retain messages up to
118+
# a fraction of the maximum context size, which will reduce the need for future
119+
# truncations and thus improve the cache rate. Truncation can be disabled
120+
# entirely, which means the server will never truncate but would instead return an
121+
# error if the conversation exceeds the model's input token limit.
111122
#
112123
# @return [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio, nil]
113124
optional :truncation, union: -> { OpenAI::Realtime::RealtimeTruncation }
@@ -141,7 +152,7 @@ class RealtimeSessionCreateResponse < OpenAI::Internal::Type::BaseModel
141152
#
142153
# @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration, nil] Realtime API can write session traces to the [Traces Dashboard](/logs?api=traces
143154
#
144-
# @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio] Controls how the realtime conversation is truncated prior to model inference.
155+
# @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio] When the number of tokens in a conversation exceeds the model's input token limi
145156
#
146157
# @param type [Symbol, :realtime] The type of session to create. Always `realtime` for the Realtime API.
147158

lib/openai/models/realtime/realtime_truncation.rb

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,19 @@
33
module OpenAI
44
module Models
55
module Realtime
6-
# Controls how the realtime conversation is truncated prior to model inference.
7-
# The default is `auto`.
6+
# When the number of tokens in a conversation exceeds the model's input token
7+
# limit, the conversation be truncated, meaning messages (starting from the
8+
# oldest) will not be included in the model's context. A 32k context model with
9+
# 4,096 max output tokens can only include 28,224 tokens in the context before
10+
# truncation occurs. Clients can configure truncation behavior to truncate with a
11+
# lower max token limit, which is an effective way to control token usage and
12+
# cost. Truncation will reduce the number of cached tokens on the next turn
13+
# (busting the cache), since messages are dropped from the beginning of the
14+
# context. However, clients can also configure truncation to retain messages up to
15+
# a fraction of the maximum context size, which will reduce the need for future
16+
# truncations and thus improve the cache rate. Truncation can be disabled
17+
# entirely, which means the server will never truncate but would instead return an
18+
# error if the conversation exceeds the model's input token limit.
819
module RealtimeTruncation
920
extend OpenAI::Internal::Type::Union
1021

lib/openai/models/realtime/realtime_truncation_retention_ratio.rb

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ module Models
55
module Realtime
66
class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel
77
# @!attribute retention_ratio
8-
# Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
9-
# conversation exceeds the input token limit.
8+
# Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
9+
# the conversation exceeds the input token limit. Setting this to `0.8` means that
10+
# messages will be dropped until 80% of the maximum allowed tokens are used. This
11+
# helps reduce the frequency of truncations and improve cache rates.
1012
#
1113
# @return [Float]
1214
required :retention_ratio, Float
@@ -17,17 +19,49 @@ class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel
1719
# @return [Symbol, :retention_ratio]
1820
required :type, const: :retention_ratio
1921

20-
# @!method initialize(retention_ratio:, type: :retention_ratio)
22+
# @!attribute token_limits
23+
# Optional custom token limits for this truncation strategy. If not provided, the
24+
# model's default token limits will be used.
25+
#
26+
# @return [OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio::TokenLimits, nil]
27+
optional :token_limits, -> { OpenAI::Realtime::RealtimeTruncationRetentionRatio::TokenLimits }
28+
29+
# @!method initialize(retention_ratio:, token_limits: nil, type: :retention_ratio)
2130
# Some parameter documentations has been truncated, see
2231
# {OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio} for more details.
2332
#
2433
# Retain a fraction of the conversation tokens when the conversation exceeds the
2534
# input token limit. This allows you to amortize truncations across multiple
2635
# turns, which can help improve cached token usage.
2736
#
28-
# @param retention_ratio [Float] Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
37+
# @param retention_ratio [Float] Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
38+
#
39+
# @param token_limits [OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio::TokenLimits] Optional custom token limits for this truncation strategy. If not provided, the
2940
#
3041
# @param type [Symbol, :retention_ratio] Use retention ratio truncation.
42+
43+
# @see OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio#token_limits
44+
class TokenLimits < OpenAI::Internal::Type::BaseModel
45+
# @!attribute post_instructions
46+
# Maximum tokens allowed in the conversation after instructions (which including
47+
# tool definitions). For example, setting this to 5,000 would mean that truncation
48+
# would occur when the conversation exceeds 5,000 tokens after instructions. This
49+
# cannot be higher than the model's context window size minus the maximum output
50+
# tokens.
51+
#
52+
# @return [Integer, nil]
53+
optional :post_instructions, Integer
54+
55+
# @!method initialize(post_instructions: nil)
56+
# Some parameter documentations has been truncated, see
57+
# {OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio::TokenLimits} for
58+
# more details.
59+
#
60+
# Optional custom token limits for this truncation strategy. If not provided, the
61+
# model's default token limits will be used.
62+
#
63+
# @param post_instructions [Integer] Maximum tokens allowed in the conversation after instructions (which including t
64+
end
3165
end
3266
end
3367
end

lib/openai/models/responses/file_search_tool.rb

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@ module Filters
7171

7272
# @see OpenAI::Models::Responses::FileSearchTool#ranking_options
7373
class RankingOptions < OpenAI::Internal::Type::BaseModel
74+
# @!attribute hybrid_search
75+
# Weights that control how reciprocal rank fusion balances semantic embedding
76+
# matches versus sparse keyword matches when hybrid search is enabled.
77+
#
78+
# @return [OpenAI::Models::Responses::FileSearchTool::RankingOptions::HybridSearch, nil]
79+
optional :hybrid_search, -> { OpenAI::Responses::FileSearchTool::RankingOptions::HybridSearch }
80+
7481
# @!attribute ranker
7582
# The ranker to use for the file search.
7683
#
@@ -85,16 +92,41 @@ class RankingOptions < OpenAI::Internal::Type::BaseModel
8592
# @return [Float, nil]
8693
optional :score_threshold, Float
8794

88-
# @!method initialize(ranker: nil, score_threshold: nil)
95+
# @!method initialize(hybrid_search: nil, ranker: nil, score_threshold: nil)
8996
# Some parameter documentations has been truncated, see
9097
# {OpenAI::Models::Responses::FileSearchTool::RankingOptions} for more details.
9198
#
9299
# Ranking options for search.
93100
#
101+
# @param hybrid_search [OpenAI::Models::Responses::FileSearchTool::RankingOptions::HybridSearch] Weights that control how reciprocal rank fusion balances semantic embedding matc
102+
#
94103
# @param ranker [Symbol, OpenAI::Models::Responses::FileSearchTool::RankingOptions::Ranker] The ranker to use for the file search.
95104
#
96105
# @param score_threshold [Float] The score threshold for the file search, a number between 0 and 1. Numbers close
97106

107+
# @see OpenAI::Models::Responses::FileSearchTool::RankingOptions#hybrid_search
108+
class HybridSearch < OpenAI::Internal::Type::BaseModel
109+
# @!attribute embedding_weight
110+
# The weight of the embedding in the reciprocal ranking fusion.
111+
#
112+
# @return [Float]
113+
required :embedding_weight, Float
114+
115+
# @!attribute text_weight
116+
# The weight of the text in the reciprocal ranking fusion.
117+
#
118+
# @return [Float]
119+
required :text_weight, Float
120+
121+
# @!method initialize(embedding_weight:, text_weight:)
122+
# Weights that control how reciprocal rank fusion balances semantic embedding
123+
# matches versus sparse keyword matches when hybrid search is enabled.
124+
#
125+
# @param embedding_weight [Float] The weight of the embedding in the reciprocal ranking fusion.
126+
#
127+
# @param text_weight [Float] The weight of the text in the reciprocal ranking fusion.
128+
end
129+
98130
# The ranker to use for the file search.
99131
#
100132
# @see OpenAI::Models::Responses::FileSearchTool::RankingOptions#ranker

lib/openai/models/responses/response_output_text.rb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ class ResponseOutputText < OpenAI::Internal::Type::BaseModel
1111
required :annotations,
1212
-> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Responses::ResponseOutputText::Annotation] }
1313

14+
# @!attribute logprobs
15+
#
16+
# @return [Array<OpenAI::Models::Responses::ResponseOutputText::Logprob>]
17+
required :logprobs, -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Responses::ResponseOutputText::Logprob] }
18+
1419
# @!attribute text
1520
# The text output from the model.
1621
#
@@ -31,20 +36,15 @@ class ResponseOutputText < OpenAI::Internal::Type::BaseModel
3136
# @return [Symbol, :output_text]
3237
required :type, const: :output_text
3338

34-
# @!attribute logprobs
35-
#
36-
# @return [Array<OpenAI::Models::Responses::ResponseOutputText::Logprob>, nil]
37-
optional :logprobs, -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Responses::ResponseOutputText::Logprob] }
38-
39-
# @!method initialize(annotations:, text:, logprobs: nil, type: :output_text)
39+
# @!method initialize(annotations:, logprobs:, text:, type: :output_text)
4040
# A text output from the model.
4141
#
4242
# @param annotations [Array<OpenAI::Models::Responses::ResponseOutputText::Annotation::FileCitation, OpenAI::Models::Responses::ResponseOutputText::Annotation::URLCitation, OpenAI::Models::Responses::ResponseOutputText::Annotation::ContainerFileCitation, OpenAI::Models::Responses::ResponseOutputText::Annotation::FilePath>] The annotations of the text output.
4343
#
44-
# @param text [String] The text output from the model.
45-
#
4644
# @param logprobs [Array<OpenAI::Models::Responses::ResponseOutputText::Logprob>]
4745
#
46+
# @param text [String] The text output from the model.
47+
#
4848
# @param type [Symbol, :output_text] The type of the output text. Always `output_text`.
4949

5050
# A citation to a file.

0 commit comments

Comments
 (0)