disable pplx for quantized types

bnellnm · bnellnm · commit c5adb6823a80 · 2025-05-14T14:56:26.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -835,15 +835,16 @@ def __init__(
 
         if quant_config is None:
             quant_method = UnquantizedFusedMoEMethod(moe)
+            prepare_finalize = _construct_prepare_finalize(moe, quant_config)
         else:
             quant_method = quant_config.get_quant_method(self, prefix)
+            # No pplx for quantized types yet.
+            prepare_finalize = None
 
         assert quant_method is not None
         assert isinstance(quant_method, FusedMoEMethodBase)
         self.quant_method = quant_method
 
-        prepare_finalize = _construct_prepare_finalize(moe, quant_config)
-
         if prepare_finalize is not None:
             world_size = moe.ep_size
             dp_size = int(moe.ep_size // moe.dp_size)