File tree Expand file tree Collapse file tree 4 files changed +10
-21
lines changed
Expand file tree Collapse file tree 4 files changed +10
-21
lines changed Original file line number Diff line number Diff line change @@ -251,13 +251,6 @@ class CompilationConfig:
251251 disabled when running with Inductor: mode>=VLLM_COMPILE and use_inductor=True.
252252 Inductor generates (fused) Triton kernels for disabled custom ops."""
253253 splitting_ops : list [str ] | None = None
254-
255- """
256- Provide control over whether to compile the multimodal encoder
257- such as Qwen2_5_vl
258- """
259- compile_mm_encoder : bool = True
260-
261254 """A list of ops to exclude from cudagraphs, used in piecewise compilation.
262255
263256 The behavior depends on use_inductor_graph_partition:
@@ -275,6 +268,9 @@ class CompilationConfig:
275268
276269 If None, defaults to attention ops for piecewise cudagraphs.
277270 If empty list [], no ops are excluded (suitable for full cudagraphs)."""
271+ compile_mm_encoder : bool = True
272+ """Whether or not to compile the multimodal encoder.
273+ Currently, this only works for `Qwen2_5_vl`."""
278274
279275 # Inductor capture
280276 use_inductor : bool | None = None
Original file line number Diff line number Diff line change 6767from vllm .model_executor .layers .quantization import QuantizationConfig
6868from vllm .model_executor .model_loader .weight_utils import default_weight_loader
6969from vllm .model_executor .models .module_mapping import MultiModelKeys
70- from vllm .model_executor .models .transformers .utils import (
71- should_torch_compile_mm_vit ,
72- )
70+ from vllm .model_executor .models .vision import should_torch_compile_mm_vit
7371from vllm .multimodal import MULTIMODAL_REGISTRY
7472from vllm .multimodal .evs import (
7573 compute_mrope_for_media ,
Original file line number Diff line number Diff line change @@ -205,14 +205,3 @@ def can_enable_torch_compile(vllm_config: "VllmConfig") -> bool:
205205 # Dynamic rope scaling is not compatible with torch.compile
206206 rope_scaling : dict = getattr (text_config , "rope_scaling" , None ) or {}
207207 return rope_scaling .get ("rope_type" ) != "dynamic"
208-
209-
210- def should_torch_compile_mm_vit (vllm_config : "VllmConfig" ) -> bool :
211- """
212- Callable to be passed to `@support_torch_compile`'s `enable_if` argument.
213-
214- Defaults to `True` but is disabled in the following situations:
215-
216- - The model uses dynamic rope scaling.
217- """
218- return vllm_config .compilation_config .compile_mm_encoder
Original file line number Diff line number Diff line change 1111from transformers import PretrainedConfig
1212
1313from vllm .attention .backends .registry import _Backend
14+ from vllm .config import VllmConfig
1415from vllm .distributed import (
1516 get_tensor_model_parallel_rank ,
1617 get_tensor_model_parallel_world_size ,
@@ -100,6 +101,11 @@ def get_vit_attn_backend(
100101 return current_platform .get_vit_attn_backend (head_size , dtype )
101102
102103
104+ def should_torch_compile_mm_vit (vllm_config : VllmConfig ) -> bool :
105+ """Callable to be passed to `@support_torch_compile`'s `enable_if` argument."""
106+ return vllm_config .compilation_config .compile_mm_encoder
107+
108+
103109VisionFeatureSelectStrategyStr = Literal ["class" , "default" , "full" ]
104110
105111VisionFeatureSelectStrategy : TypeAlias = (
You can’t perform that action at this time.
0 commit comments