Skip to content

Commit 825b044

Browse files
fialhocoelhonjhill
andauthored
[Frontend] Warn if user max_model_len is greater than derived max_model_len (#7080)
Signed-off-by: Jefferson Fialho <[email protected]> Co-authored-by: Nick Hill <[email protected]>
1 parent 44dcb52 commit 825b044

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

vllm/config.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import torch
77
from transformers import PretrainedConfig
88

9+
import vllm.envs as envs
910
from vllm.logger import init_logger
1011
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
1112
from vllm.model_executor.models import ModelRegistry
@@ -1541,15 +1542,21 @@ def _get_and_verify_max_len(
15411542
"Disabling sliding window is not supported for models "
15421543
"model_max_length in the config. Please raise an issue "
15431544
"so we can investigate.")
1544-
pass
15451545
else:
1546-
raise ValueError(
1546+
msg = (
15471547
f"User-specified max_model_len ({max_model_len}) is greater "
1548-
"than the derived max_model_len "
1549-
f"({max_len_key}={derived_max_model_len} or model_max_length="
1548+
f"than the derived max_model_len ({max_len_key}="
1549+
f"{derived_max_model_len} or model_max_length="
15501550
f"{model_max_length} in model's config.json). This may lead "
1551-
"to incorrect model outputs or CUDA errors. Make sure the "
1552-
"value is correct and within the model context size.")
1551+
"to incorrect model outputs or CUDA errors.")
1552+
if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN:
1553+
logger.warning(
1554+
"%s Make sure the value is correct and within the "
1555+
"model context size.", msg)
1556+
else:
1557+
raise ValueError(
1558+
f"{msg} To allow overriding this maximum, set "
1559+
"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1")
15531560
return int(max_model_len)
15541561

15551562

vllm/envs.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
VLLM_NO_DEPRECATION_WARNING: bool = False
5151
CMAKE_BUILD_TYPE: Optional[str] = None
5252
VERBOSE: bool = False
53+
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
5354

5455

5556
def get_default_cache_root():
@@ -331,6 +332,15 @@ def get_default_config_root():
331332
# If set, vllm will skip the deprecation warnings.
332333
"VLLM_NO_DEPRECATION_WARNING":
333334
lambda: bool(int(os.getenv("VLLM_NO_DEPRECATION_WARNING", "0"))),
335+
336+
# If the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN is set, it allows
337+
# the user to specify a max sequence length greater than
338+
# the max length derived from the model's config.json.
339+
# To enable this, set VLLM_ALLOW_LONG_MAX_MODEL_LEN=1.
340+
"VLLM_ALLOW_LONG_MAX_MODEL_LEN":
341+
lambda:
342+
(os.environ.get("VLLM_ALLOW_LONG_MAX_MODEL_LEN", "0").strip().lower() in
343+
("1", "true")),
334344
}
335345

336346
# end-env-vars-definition

0 commit comments

Comments
 (0)