Skip to content

Commit c5adb68

Browse files
committed
disable pplx for quantized types
Signed-off-by: Bill Nell <[email protected]>
1 parent 1cb6b1d commit c5adb68

File tree

1 file changed

+3
-2
lines changed
  • vllm/model_executor/layers/fused_moe

1 file changed

+3
-2
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,15 +835,16 @@ def __init__(
835835

836836
if quant_config is None:
837837
quant_method = UnquantizedFusedMoEMethod(moe)
838+
prepare_finalize = _construct_prepare_finalize(moe, quant_config)
838839
else:
839840
quant_method = quant_config.get_quant_method(self, prefix)
841+
# No pplx for quantized types yet.
842+
prepare_finalize = None
840843

841844
assert quant_method is not None
842845
assert isinstance(quant_method, FusedMoEMethodBase)
843846
self.quant_method = quant_method
844847

845-
prepare_finalize = _construct_prepare_finalize(moe, quant_config)
846-
847848
if prepare_finalize is not None:
848849
world_size = moe.ep_size
849850
dp_size = int(moe.ep_size // moe.dp_size)

0 commit comments

Comments
 (0)