We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2bb0e1a commit 166a168Copy full SHA for 166a168
vllm/multimodal/profiling.py
@@ -218,8 +218,10 @@ def get_decoder_dummy_data(
218
219
# V0 does not support chunked prefill.
220
if total_len > seq_len and not envs.VLLM_USE_V1:
221
+ # `max_num_batched_tokens` is defined by `SchedulerConfig`
222
logger.warning(
- "The context length (%d) of the model is too short "
223
+ "The sequence length used for profiling ("
224
+ "max_num_batched_tokens / max_num_seqs = %d) is too short "
225
"to hold the multi-modal embeddings in the worst case "
226
"(%d tokens in total, out of which %s are reserved for "
227
"multi-modal embeddings). This may cause certain "
0 commit comments