File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change @@ -254,9 +254,12 @@ def forward_hpu(
254
254
renormalize : bool ,
255
255
topk_group : Optional [int ] = None ,
256
256
num_expert_group : Optional [int ] = None ,
257
+ global_num_experts : int = - 1 ,
258
+ expert_map : Optional [torch .Tensor ] = None ,
257
259
custom_routing_function : Optional [Callable ] = None ,
258
260
scoring_func : str = "softmax" ,
259
- e_score_correction_bias : Optional [torch .Tensor ] = None
261
+ e_score_correction_bias : Optional [torch .Tensor ] = None ,
262
+ activation : str = "silu" ,
260
263
) -> torch .Tensor :
261
264
assert not use_grouped_topk
262
265
assert num_expert_group is None
@@ -472,7 +475,7 @@ def __init__(
472
475
"non-grouped topk." )
473
476
if current_platform .is_hpu ():
474
477
from vllm_hpu_extension .ops import DynamicFusedMOE
475
- self .hpu_fused_moe = DynamicFusedMOE (self .num_experts )
478
+ self .hpu_fused_moe = DynamicFusedMOE (self .global_num_experts )
476
479
477
480
# Note: get_quant_method will look at the layer's local_num_experts
478
481
# for heuristic purposes, so it must be initialized first.
You can’t perform that action at this time.
0 commit comments