Skip to content

Commit ed3aeb2

Browse files
authored
[V1] [Hybrid] Remove code to override default CUDA graph configuration (#26226)
Signed-off-by: Thomas Parnell <[email protected]>
1 parent 86ee949 commit ed3aeb2

File tree

1 file changed

+0
-15
lines changed

1 file changed

+0
-15
lines changed

vllm/model_executor/models/config.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from typing import TYPE_CHECKING
55

66
import vllm.envs as envs
7-
from vllm.config.compilation import CUDAGraphMode
87
from vllm.logger import init_logger
98
from vllm.model_executor.models import ModelRegistry
109
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, cdiv
@@ -290,7 +289,6 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
290289

291290
model_config = vllm_config.model_config
292291
cache_config = vllm_config.cache_config
293-
compilation_config = vllm_config.compilation_config
294292

295293
# Set mamba block size to max_model_len (this may get
296294
# override by prefix caching logic later)
@@ -320,19 +318,6 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
320318
"for hybrid models.")
321319
model_config.disable_cascade_attn = True
322320

323-
# TODO(tdoublep): remove as full cuda graph support is added
324-
FCG_NOT_SUPPORTED_MODELS = [
325-
"Lfm2ForCausalLM",
326-
"MiniMaxText01ForCausalLM",
327-
]
328-
329-
if (model_config.architecture not in FCG_NOT_SUPPORTED_MODELS
330-
and compilation_config.cudagraph_mode is None):
331-
logger.info(
332-
"Hybrid or mamba-based model detected: setting cudagraph mode "
333-
"to FULL_AND_PIECEWISE in order to optimize performance.")
334-
compilation_config.cudagraph_mode = CUDAGraphMode.FULL_AND_PIECEWISE
335-
336321

337322
class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig):
338323

0 commit comments

Comments
 (0)