@@ -103,113 +103,89 @@ class SamplingParams(
103
103
Overall, we follow the sampling parameters from the OpenAI text completion
104
104
API (https://platform.openai.com/docs/api-reference/completions/create).
105
105
In addition, we support beam search, which is not supported by OpenAI.
106
-
107
- Args:
108
- n: Number of output sequences to return for the given prompt.
109
- best_of: Number of output sequences that are generated from the prompt.
110
- From these `best_of` sequences, the top `n` sequences are returned.
111
- `best_of` must be greater than or equal to `n`. By default,
112
- `best_of` is set to `n`. Warning, this is only supported in V0.
113
- presence_penalty: Float that penalizes new tokens based on whether they
114
- appear in the generated text so far. Values > 0 encourage the model
115
- to use new tokens, while values < 0 encourage the model to repeat
116
- tokens.
117
- frequency_penalty: Float that penalizes new tokens based on their
118
- frequency in the generated text so far. Values > 0 encourage the
119
- model to use new tokens, while values < 0 encourage the model to
120
- repeat tokens.
121
- repetition_penalty: Float that penalizes new tokens based on whether
122
- they appear in the prompt and the generated text so far. Values > 1
123
- encourage the model to use new tokens, while values < 1 encourage
124
- the model to repeat tokens.
125
- temperature: Float that controls the randomness of the sampling. Lower
126
- values make the model more deterministic, while higher values make
127
- the model more random. Zero means greedy sampling.
128
- top_p: Float that controls the cumulative probability of the top tokens
129
- to consider. Must be in (0, 1]. Set to 1 to consider all tokens.
130
- top_k: Integer that controls the number of top tokens to consider. Set
131
- to 0 (or -1) to consider all tokens.
132
- min_p: Float that represents the minimum probability for a token to be
133
- considered, relative to the probability of the most likely token.
134
- Must be in [0, 1]. Set to 0 to disable this.
135
- seed: Random seed to use for the generation.
136
- stop: list of strings that stop the generation when they are generated.
137
- The returned output will not contain the stop strings.
138
- stop_token_ids: list of tokens that stop the generation when they are
139
- generated. The returned output will contain the stop tokens unless
140
- the stop tokens are special tokens.
141
- bad_words: list of words that are not allowed to be generated.
142
- More precisely, only the last token of a corresponding
143
- token sequence is not allowed when the next generated token
144
- can complete the sequence.
145
- include_stop_str_in_output: Whether to include the stop strings in
146
- output text. Defaults to False.
147
- ignore_eos: Whether to ignore the EOS token and continue generating
148
- tokens after the EOS token is generated.
149
- max_tokens: Maximum number of tokens to generate per output sequence.
150
- min_tokens: Minimum number of tokens to generate per output sequence
151
- before EOS or stop_token_ids can be generated
152
- logprobs: Number of log probabilities to return per output token.
153
- When set to None, no probability is returned. If set to a non-None
154
- value, the result includes the log probabilities of the specified
155
- number of most likely tokens, as well as the chosen tokens.
156
- Note that the implementation follows the OpenAI API: The API will
157
- always return the log probability of the sampled token, so there
158
- may be up to `logprobs+1` elements in the response.
159
- When set to -1, return all `vocab_size` log probabilities.
160
- prompt_logprobs: Number of log probabilities to return per prompt token.
161
- detokenize: Whether to detokenize the output. Defaults to True.
162
- skip_special_tokens: Whether to skip special tokens in the output.
163
- spaces_between_special_tokens: Whether to add spaces between special
164
- tokens in the output. Defaults to True.
165
- logits_processors: list of functions that modify logits based on
166
- previously generated tokens, and optionally prompt tokens as
167
- a first argument.
168
- truncate_prompt_tokens: If set to -1, will use the truncation size
169
- supported by the model. If set to an integer k, will use only
170
- the last k tokens from the prompt (i.e., left truncation).
171
- Defaults to None (i.e., no truncation).
172
- guided_decoding: If provided, the engine will construct a guided
173
- decoding logits processor from these parameters. Defaults to None.
174
- logit_bias: If provided, the engine will construct a logits processor
175
- that applies these logit biases. Defaults to None.
176
- allowed_token_ids: If provided, the engine will construct a logits
177
- processor which only retains scores for the given token ids.
178
- Defaults to None.
179
- extra_args: Arbitrary additional args, that can be used by custom
180
- sampling implementations, plugins, etc. Not used by any in-tree
181
- sampling implementations.
182
106
"""
183
107
184
108
n : int = 1
109
+ """Number of output sequences to return for the given prompt."""
185
110
best_of : Optional [int ] = None
111
+ """Number of output sequences that are generated from the prompt. From
112
+ these `best_of` sequences, the top `n` sequences are returned. `best_of`
113
+ must be greater than or equal to `n`. By default, `best_of` is set to `n`.
114
+ Warning, this is only supported in V0."""
186
115
_real_n : Optional [int ] = None
187
116
presence_penalty : float = 0.0
117
+ """Penalizes new tokens based on whether they appear in the generated text
118
+ so far. Values > 0 encourage the model to use new tokens, while values < 0
119
+ encourage the model to repeat tokens."""
188
120
frequency_penalty : float = 0.0
121
+ """Penalizes new tokens based on their frequency in the generated text so
122
+ far. Values > 0 encourage the model to use new tokens, while values < 0
123
+ encourage the model to repeat tokens."""
189
124
repetition_penalty : float = 1.0
125
+ """Penalizes new tokens based on whether they appear in the prompt and the
126
+ generated text so far. Values > 1 encourage the model to use new tokens,
127
+ while values < 1 encourage the model to repeat tokens."""
190
128
temperature : float = 1.0
129
+ """Controls the randomness of the sampling. Lower values make the model
130
+ more deterministic, while higher values make the model more random. Zero
131
+ means greedy sampling."""
191
132
top_p : float = 1.0
133
+ """Controls the cumulative probability of the top tokens to consider. Must
134
+ be in (0, 1]. Set to 1 to consider all tokens."""
192
135
top_k : int = 0
136
+ """Controls the number of top tokens to consider. Set to 0 (or -1) to
137
+ consider all tokens."""
193
138
min_p : float = 0.0
139
+ """Represents the minimum probability for a token to be considered,
140
+ relative to the probability of the most likely token. Must be in [0, 1].
141
+ Set to 0 to disable this."""
194
142
seed : Optional [int ] = None
143
+ """Random seed to use for the generation."""
195
144
stop : Optional [Union [str , list [str ]]] = None
145
+ """String(s) that stop the generation when they are generated. The returned
146
+ output will not contain the stop strings."""
196
147
stop_token_ids : Optional [list [int ]] = None
148
+ """Token IDs that stop the generation when they are generated. The returned
149
+ output will contain the stop tokens unless the stop tokens are special
150
+ tokens."""
197
151
ignore_eos : bool = False
152
+ """Whether to ignore the EOS token and continue generating
153
+ tokens after the EOS token is generated."""
198
154
max_tokens : Optional [int ] = 16
155
+ """Maximum number of tokens to generate per output sequence."""
199
156
min_tokens : int = 0
157
+ """Minimum number of tokens to generate per output sequence before EOS or
158
+ `stop_token_ids` can be generated"""
200
159
logprobs : Optional [int ] = None
160
+ """Number of log probabilities to return per output token. When set to
161
+ `None`, no probability is returned. If set to a non-`None` value, the
162
+ result includes the log probabilities of the specified number of most
163
+ likely tokens, as well as the chosen tokens. Note that the implementation
164
+ follows the OpenAI API: The API will always return the log probability of
165
+ the sampled token, so there may be up to `logprobs+1` elements in the
166
+ response. When set to -1, return all `vocab_size` log probabilities."""
201
167
prompt_logprobs : Optional [int ] = None
168
+ """Number of log probabilities to return per prompt token."""
202
169
# NOTE: This parameter is only exposed at the engine level for now.
203
170
# It is not exposed in the OpenAI API server, as the OpenAI API does
204
171
# not support returning only a list of token IDs.
205
172
detokenize : bool = True
173
+ """Whether to detokenize the output."""
206
174
skip_special_tokens : bool = True
175
+ """Whether to skip special tokens in the output."""
207
176
spaces_between_special_tokens : bool = True
177
+ """Whether to add spaces between special tokens in the output."""
208
178
# Optional[list[LogitsProcessor]] type. We use Any here because
209
179
# Optional[list[LogitsProcessor]] type is not supported by msgspec.
210
180
logits_processors : Optional [Any ] = None
181
+ """Functions that modify logits based on previously generated tokens, and
182
+ optionally prompt tokens as a first argument."""
211
183
include_stop_str_in_output : bool = False
184
+ """Whether to include the stop strings in output text."""
212
185
truncate_prompt_tokens : Optional [Annotated [int , msgspec .Meta (ge = 1 )]] = None
186
+ """If set to -1, will use the truncation size supported by the model. If
187
+ set to an integer k, will use only the last k tokens from the prompt
188
+ (i.e., left truncation). If set to `None`, truncation is disabled."""
213
189
output_kind : RequestOutputKind = RequestOutputKind .CUMULATIVE
214
190
215
191
# The below fields are not supposed to be used as an input.
@@ -219,12 +195,24 @@ class SamplingParams(
219
195
220
196
# Fields used to construct logits processors
221
197
guided_decoding : Optional [GuidedDecodingParams ] = None
198
+ """If provided, the engine will construct a guided decoding logits
199
+ processor from these parameters."""
222
200
logit_bias : Optional [dict [int , float ]] = None
201
+ """If provided, the engine will construct a logits processor that applies
202
+ these logit biases."""
223
203
allowed_token_ids : Optional [list [int ]] = None
204
+ """If provided, the engine will construct a logits processor which only
205
+ retains scores for the given token ids."""
224
206
extra_args : Optional [dict [str , Any ]] = None
207
+ """Arbitrary additional args, that can be used by custom sampling
208
+ implementations, plugins, etc. Not used by any in-tree sampling
209
+ implementations."""
225
210
226
211
# Fields used for bad words
227
212
bad_words : Optional [list [str ]] = None
213
+ """Words that are not allowed to be generated. More precisely, only the
214
+ last token of a corresponding token sequence is not allowed when the next
215
+ generated token can complete the sequence."""
228
216
_bad_words_token_ids : Optional [list [list [int ]]] = None
229
217
230
218
@staticmethod
0 commit comments