Skip to content

Commit 432e1cb

Browse files
[Bugfix]: Assertion error when using FlashInfer backend (#25933)
Signed-off-by: simondanielsson <[email protected]> Co-authored-by: Cyrus Leung <[email protected]>
1 parent 201c971 commit 432e1cb

File tree

1 file changed

+2
-2
lines changed
  • vllm/model_executor/layers/quantization

1 file changed

+2
-2
lines changed

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
508508
self.layer = layer
509509
self.quant_config = quant_config
510510
self.weight_block_size = self.quant_config.weight_block_size
511-
self.block_quant = self.weight_block_size is not None
511+
self.block_quant: bool = self.weight_block_size is not None
512512

513513
self.fused_experts: Optional[
514514
mk.FusedMoEModularKernel] = None # type: ignore
@@ -1094,7 +1094,7 @@ def apply(
10941094
expert_map=expert_map,
10951095
)
10961096
elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS:
1097-
assert self.block_quant is None
1097+
assert not self.block_quant
10981098
assert (not renormalize and custom_routing_function is not None)
10991099
assert activation == 'silu', (
11001100
f"Expected 'silu' activation but got {activation}")

0 commit comments

Comments
 (0)