We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2ccaab2 commit 3b90f68Copy full SHA for 3b90f68
examples/deepseek_v2/pretrain_deepseek.py
@@ -102,7 +102,7 @@ def get_batch(data_iterator):
102
if args.train_mode == "pretrain":
103
batch = get_batch_on_this_tp_rank(data_iterator)
104
else:
105
- batch = get_batch_on_this_tp_rank_idxmap_sft(data_iterator)
+ batch = get_batch_on_this_tp_rank_idxmap_sft(data_iterator, per_seq_average=True)
106
107
packed_seq_params = None
108
if args.reset_position_ids:
0 commit comments