File tree Expand file tree Collapse file tree 1 file changed +8
-6
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -741,12 +741,14 @@ def __init__(
741
741
742
742
# we padding globally so EP buffer allocation works
743
743
if quant_config and quant_config .get_name () == "mxfp4" :
744
- if not is_torch_equal_or_newer ("2.8.0" ):
745
- raise RuntimeError ("Mxfp4 on hopper requires torch >= 2.8.0" )
746
- if current_platform .is_device_capability (
747
- 90 ) and not has_triton_kernels ():
748
- raise NotImplementedError (
749
- "Triton kernels must be installed for mxfp4 on hopper" )
744
+ if not current_platform .is_device_capability (100 ):
745
+ if not is_torch_equal_or_newer ("2.8.0" ):
746
+ raise RuntimeError (
747
+ "Mxfp4 on non-blackwell requires torch >= 2.8.0" )
748
+ if not has_triton_kernels ():
749
+ raise NotImplementedError (
750
+ "triton_kernels must be installed for "
751
+ "mxfp4 on non-blackwell" )
750
752
if (current_platform .is_rocm ()
751
753
or envs .VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8
752
754
or envs .VLLM_USE_FLASHINFER_MOE_MXFP4_BF16 ):
You can’t perform that action at this time.
0 commit comments