We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 36cb5f8 commit 4c1d9d0Copy full SHA for 4c1d9d0
tensorrt_llm/_torch/modules/fused_moe/create_moe.py
@@ -196,6 +196,7 @@ def create_moe_backend(
196
swiglu_beta=swiglu_beta,
197
swiglu_limit=swiglu_limit,
198
init_load_balancer=init_load_balancer,
199
+ without_comm=without_comm,
200
activation_type=activation_type,
201
)
202
elif moe_cls == WideEPMoE:
@@ -256,6 +257,7 @@ def create_moe_backend(
256
257
weight_loading_mode=weight_loading_mode,
258
apply_router_weight_on_input=apply_router_weight_on_input,
259
layer_idx=layer_idx,
260
261
262
elif moe_cls == TritonFusedMoE:
263
assert not apply_router_weight_on_input, "apply_router_weight_on_input is not supported in TritonFusedMoE."
0 commit comments