Skip to content

Commit 801c45a

Browse files
authored
[megatron] fix eval_iters -1 (#4847)
1 parent b059291 commit 801c45a

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

swift/megatron/train/trainers/trainer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,10 @@ def initialize_megatron(*_args, **kwargs):
4646
else:
4747
raise ValueError(
4848
'You are using a streaming training dataset. Please explicitly specify `--train_iters`.')
49-
if val_dataset is not None and args.eval_iters < 0:
50-
if hasattr(val_dataset, '__len__'):
49+
if args.eval_iters < 0:
50+
if val_dataset is None:
51+
args.eval_iters = 0
52+
elif hasattr(val_dataset, '__len__'):
5153
dataset_sample = len(val_dataset) // step_batch_size * step_batch_size
5254
args.eval_iters = max(dataset_sample // args.global_batch_size, 1)
5355
else:

swift/megatron/train/utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ def swift_datasets_provider(train_val_test_num_samples):
1515
nonlocal val_dataset
1616
args = get_args()
1717
data_parallel_size = mpu.get_data_parallel_world_size()
18-
step_batch_size = \
19-
args.micro_batch_size * data_parallel_size
18+
step_batch_size = args.micro_batch_size * data_parallel_size
2019
# To avoid errors caused by the validation set being insufficient to complete a single step.
2120
if val_dataset is not None and len(val_dataset) < step_batch_size:
2221
val_dataset = None

tests/megatron/test_train.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ def test_sft():
1717
train_iters=100,
1818
model_author='swift',
1919
model_name='swift-robot',
20-
eval_iters=5,
2120
sequence_parallel=True,
2221
finetune=True))
2322

0 commit comments

Comments
 (0)