File tree Expand file tree Collapse file tree 1 file changed +13
-3
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +13
-3
lines changed Original file line number Diff line number Diff line change @@ -475,12 +475,11 @@ def forward_cuda(
475
475
activation = activation ,
476
476
apply_router_weight_on_input = apply_router_weight_on_input )
477
477
else :
478
- return self .fused_experts (
478
+ # add w1_bias/w2_bias to kwargs if they exist
479
+ kwargs = dict (
479
480
hidden_states = x ,
480
481
w1 = layer .w13_weight ,
481
482
w2 = layer .w2_weight ,
482
- w1_bias = layer .w13_bias if self .has_bias else None ,
483
- w2_bias = layer .w2_bias if self .has_bias else None ,
484
483
topk_weights = topk_weights ,
485
484
topk_ids = topk_ids ,
486
485
inplace = True ,
@@ -489,6 +488,17 @@ def forward_cuda(
489
488
global_num_experts = global_num_experts ,
490
489
expert_map = expert_map ,
491
490
)
491
+ if isinstance (self .fused_experts ,
492
+ FusedMoEModularKernel ) and self .has_bias :
493
+ raise ValueError (
494
+ "FusedMoEModularKernel does not support bias." )
495
+ if self .has_bias :
496
+ kwargs .update ({
497
+ "w1_bias" : getattr (layer , "w13_bias" , None ),
498
+ "w2_bias" : getattr (layer , "w2_bias" , None ),
499
+ })
500
+
501
+ return self .fused_experts (** kwargs )
492
502
493
503
def forward_cpu (
494
504
self ,
You can’t perform that action at this time.
0 commit comments