File tree Expand file tree Collapse file tree 1 file changed +10
-6
lines changed Expand file tree Collapse file tree 1 file changed +10
-6
lines changed Original file line number Diff line number Diff line change @@ -3021,16 +3021,20 @@ def _get_and_verify_max_len(
30213021 f"User-specified max_model_len ({ max_model_len } ) is greater "
30223022 f"than the derived max_model_len ({ max_len_key } ="
30233023 f"{ derived_max_model_len } or model_max_length="
3024- f"{ model_max_length } in model's config.json). This may lead "
3025- "to incorrect model outputs or CUDA errors." )
3024+ f"{ model_max_length } in model's config.json)." )
3025+ warning = (
3026+ "VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme "
3027+ "caution. If the model uses relative position encoding (RoPE), "
3028+ "positions exceeding derived_max_model_len lead to nan. If the "
3029+ "model uses absolute position encoding, positions exceeding "
3030+ "derived_max_model_len will cause a CUDA array out-of-bounds "
3031+ "error." )
30263032 if envs .VLLM_ALLOW_LONG_MAX_MODEL_LEN :
3027- logger .warning (
3028- "%s Make sure the value is correct and within the "
3029- "model context size." , msg )
3033+ logger .warning_once ("%s %s" , msg , warning )
30303034 else :
30313035 raise ValueError (
30323036 f"{ msg } To allow overriding this maximum, set "
3033- "the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1" )
3037+ f "the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. { warning } " )
30343038 return int (max_model_len )
30353039
30363040
You can’t perform that action at this time.
0 commit comments