We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e9f568c commit 0cd2bc6Copy full SHA for 0cd2bc6
vllm/model_executor/layers/fused_moe/layer.py
@@ -956,6 +956,7 @@ def __init__(
956
UnquantizedFusedMoEMethod):
957
moe_op = VllmMixtureOfExpertsOp(
958
num_experts,
959
+ self.global_num_experts,
960
experts_min,
961
experts_max,
962
)
@@ -964,12 +965,14 @@ def __init__(
964
965
) and not envs.VLLM_HPU_FORCE_CHANNEL_FP8:
966
moe_op = VllmMixtureOfExpertsOpFP8(
967
968
969
970
971
972
else:
973
moe_op = VllmMixtureOfExpertsOpFP8PerChannel(
974
975
976
977
978
0 commit comments