[Bugfix]: Assertion error when using FlashInfer backend (#25933)

simondanielsson · DarkLight1337 · web-flow · commit 432e1cbc2324 · 2025-10-05T16:46:36.000+08:00
Signed-off-by: simondanielsson &lt;simon.danielsson99@hotmail.com&gt;
Co-authored-by: Cyrus Leung &lt;tlleungac@connect.ust.hk&gt;
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -508,7 +508,7 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
         self.layer = layer
         self.quant_config = quant_config
         self.weight_block_size = self.quant_config.weight_block_size
-        self.block_quant = self.weight_block_size is not None
+        self.block_quant: bool = self.weight_block_size is not None
 
         self.fused_experts: Optional[
             mk.FusedMoEModularKernel] = None  # type: ignore
@@ -1094,7 +1094,7 @@ def apply(
                 expert_map=expert_map,
             )
         elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS:
-            assert self.block_quant is None
+            assert not self.block_quant
             assert (not renormalize and custom_routing_function is not None)
             assert activation == 'silu', (
                 f"Expected 'silu' activation but got {activation}")