Skip to content

Commit f25a0f3

Browse files
authored
Fix bug of missing trainer attribute (#2684)
1 parent 89ec6b3 commit f25a0f3

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

examples/run_finetune.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ def main():
140140
model_config.max_sequence_length = training_args.max_seq_len
141141
model_config.num_nextn_predict_layers = model_args.num_nextn_predict_layers
142142
model_config._attn_implementation = model_args.attn_impl
143-
model_config.moe_subbatch_token_num = model_args.moe_subbatch_token_num
144143
logger.info(f"Final model config: {model_config}")
145144
logger.info("Creating model")
146145

paddleformers/transformers/configuration_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,10 @@ class LlmMetaConfig:
294294
),
295295
]
296296

297+
moe_attributes = [
298+
("moe_subbatch_token_num", int, 0, "The number of tokens in each subbatch for MoE model processing."),
299+
]
300+
297301
@classmethod
298302
def _get_defaults(cls):
299303
ret = {}
@@ -302,6 +306,7 @@ def _get_defaults(cls):
302306
cls.hybrid_parallel_attributes,
303307
cls.recompute_attributes,
304308
cls.loss_attributes,
309+
cls.moe_attributes,
305310
]:
306311
for attr in attrs:
307312
# return dict of key and default values
@@ -316,6 +321,7 @@ def _get_all_meta(cls):
316321
cls.hybrid_parallel_attributes,
317322
cls.recompute_attributes,
318323
cls.loss_attributes,
324+
cls.moe_attributes,
319325
]:
320326
for attr in attrs:
321327
# return dict of key and default values
@@ -330,6 +336,7 @@ def _get_unsavable_keys(cls):
330336
cls.hybrid_parallel_attributes,
331337
cls.recompute_attributes,
332338
cls.loss_attributes,
339+
cls.moe_attributes,
333340
]:
334341
for attr in attrs:
335342
ret.add(attr[0])
@@ -488,6 +495,8 @@ class PretrainedConfig:
488495
problem_type (`str`, *optional*):
489496
Problem type for `XxxForSequenceClassification` models. Can be one of `"regression"`,
490497
`"single_label_classification"` or `"multi_label_classification"`.
498+
moe_subbatch_token_num (`int`, *optional*, defaults to 0):
499+
The number of tokens in a subbatch for MoE.
491500
492501
> Parameters for general components
493502
@@ -632,6 +641,8 @@ def __init__(self, **kwargs):
632641
self.dpo_config = kwargs.pop("dpo_config", None)
633642
self.kto_config = kwargs.pop("kto_config", None)
634643

644+
self.num_subbatch_token_num = kwargs.pop("num_subbatch_token_num", 0)
645+
635646
# Tokenizer arguments TODO: eventually tokenizer and models should share the same config
636647
self.tokenizer_class = kwargs.pop("tokenizer_class", None)
637648
self.prefix = kwargs.pop("prefix", None)

0 commit comments

Comments
 (0)