|
6 | 6 | import torch
|
7 | 7 | from transformers import PretrainedConfig
|
8 | 8 |
|
| 9 | +import vllm.envs as envs |
9 | 10 | from vllm.logger import init_logger
|
10 | 11 | from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
11 | 12 | from vllm.model_executor.models import ModelRegistry
|
@@ -1541,15 +1542,21 @@ def _get_and_verify_max_len(
|
1541 | 1542 | "Disabling sliding window is not supported for models "
|
1542 | 1543 | "model_max_length in the config. Please raise an issue "
|
1543 | 1544 | "so we can investigate.")
|
1544 |
| - pass |
1545 | 1545 | else:
|
1546 |
| - raise ValueError( |
| 1546 | + msg = ( |
1547 | 1547 | f"User-specified max_model_len ({max_model_len}) is greater "
|
1548 |
| - "than the derived max_model_len " |
1549 |
| - f"({max_len_key}={derived_max_model_len} or model_max_length=" |
| 1548 | + f"than the derived max_model_len ({max_len_key}=" |
| 1549 | + f"{derived_max_model_len} or model_max_length=" |
1550 | 1550 | f"{model_max_length} in model's config.json). This may lead "
|
1551 |
| - "to incorrect model outputs or CUDA errors. Make sure the " |
1552 |
| - "value is correct and within the model context size.") |
| 1551 | + "to incorrect model outputs or CUDA errors.") |
| 1552 | + if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN: |
| 1553 | + logger.warning( |
| 1554 | + "%s Make sure the value is correct and within the " |
| 1555 | + "model context size.", msg) |
| 1556 | + else: |
| 1557 | + raise ValueError( |
| 1558 | + f"{msg} To allow overriding this maximum, set " |
| 1559 | + "the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1") |
1553 | 1560 | return int(max_model_len)
|
1554 | 1561 |
|
1555 | 1562 |
|
|
0 commit comments