[Doc] Fix misleading log during multi-modal profiling (#14955)

DarkLight1337 · web-flow · commit 166a168b0fa6 · 2025-03-17T06:14:32.000-07:00
Signed-off-by: DarkLight1337 &lt;tlleungac@connect.ust.hk&gt;
diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
@@ -218,8 +218,10 @@ def get_decoder_dummy_data(
 
         # V0 does not support chunked prefill.
         if total_len > seq_len and not envs.VLLM_USE_V1:
+            # `max_num_batched_tokens` is defined by `SchedulerConfig`
             logger.warning(
-                "The context length (%d) of the model is too short "
+                "The sequence length used for profiling ("
+                "max_num_batched_tokens / max_num_seqs = %d) is too short "
                 "to hold the multi-modal embeddings in the worst case "
                 "(%d tokens in total, out of which %s are reserved for "
                 "multi-modal embeddings). This may cause certain "