Skip to content

Commit 55602bb

Browse files
noooophmellor
andauthored
[Frontend] Update the warning log when using VLLM_ALLOW_LONG_MAX_MODEL_LEN (#20904)
Signed-off-by: wang.yuqi <[email protected]> Signed-off-by: Harry Mellor <[email protected]> Co-authored-by: Harry Mellor <[email protected]>
1 parent d7fbc6d commit 55602bb

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

vllm/config/__init__.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3021,16 +3021,20 @@ def _get_and_verify_max_len(
30213021
f"User-specified max_model_len ({max_model_len}) is greater "
30223022
f"than the derived max_model_len ({max_len_key}="
30233023
f"{derived_max_model_len} or model_max_length="
3024-
f"{model_max_length} in model's config.json). This may lead "
3025-
"to incorrect model outputs or CUDA errors.")
3024+
f"{model_max_length} in model's config.json).")
3025+
warning = (
3026+
"VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme "
3027+
"caution. If the model uses relative position encoding (RoPE), "
3028+
"positions exceeding derived_max_model_len lead to nan. If the "
3029+
"model uses absolute position encoding, positions exceeding "
3030+
"derived_max_model_len will cause a CUDA array out-of-bounds "
3031+
"error.")
30263032
if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN:
3027-
logger.warning(
3028-
"%s Make sure the value is correct and within the "
3029-
"model context size.", msg)
3033+
logger.warning_once("%s %s", msg, warning)
30303034
else:
30313035
raise ValueError(
30323036
f"{msg} To allow overriding this maximum, set "
3033-
"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1")
3037+
f"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. {warning}")
30343038
return int(max_model_len)
30353039

30363040

0 commit comments

Comments
 (0)