Skip to content

Commit a1c5bb4

Browse files
committed
fix:(moe config): default using_flex_token
1 parent 4e79040 commit a1c5bb4

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

paddleformers/transformers/moe_gate.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ def __init__(self, config, num_experts, expert_hidden_size, **kwargs):
210210
self.norm_topk_prob = kwargs.pop("norm_topk_prob", False)
211211
self.routed_scaling_factor = kwargs.pop("routed_scaling_factor", 1.0)
212212

213+
# for flex token moe layer
214+
self.using_flex_token = kwargs.pop("using_flex_token", False)
215+
213216
def _priority(self, topk_idx: paddle.Tensor, capacity: int) -> paddle.Tensor:
214217
"""_summary_
215218
The priority is the cumulative sum of the expert indices.

paddleformers/transformers/moe_layer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def __init__(
277277
def update_flex_token(self):
278278
from paddleformers.transformers.deepseek_v2 import get_global_step
279279

280-
if (not self.config.using_flex_token) or (get_global_step() < self.token_drop_steps):
280+
if (not hasattr(self.config, "using_flex_token")) or (not self.config.using_flex_token) or (get_global_step() < self.token_drop_steps):
281281
self.using_flex_token = False
282282
self.router.using_flex_token = False
283283
else:

0 commit comments

Comments
 (0)