Skip to content

Commit de3a027

Browse files
authored
disable samples-per-dataset, steps-per-dataset, tokens-per-dataset (#267)
1 parent 1893811 commit de3a027

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

megatron/training.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -658,12 +658,15 @@ def add_to_logging(name):
658658
writer.add_scalar('curriculum_seqlen', args.curriculum_seqlen,
659659
iteration)
660660

661-
if args.data_weights is not None:
662-
for prefix, weight in zip(args.data_prefixes, args.data_weights):
663-
name = prefix.split(",")[-1]
664-
writer.add_scalar(f'samples-per-dataset/{name}', args.consumed_train_samples * weight, args.consumed_train_samples)
665-
writer.add_scalar(f'steps-per-dataset/{name}', iteration * weight, iteration)
666-
writer.add_scalar(f'tokens-per-dataset/{name}', args.consumed_train_tokens * weight, args.consumed_train_tokens)
661+
# It's very questionable what this data contributes, other than huge unstripped file paths
662+
# as keys and hundreds of TB boards that make the TB files very bloated. So disabling for now.
663+
#
664+
# if args.data_weights is not None:
665+
# for prefix, weight in zip(args.data_prefixes, args.data_weights):
666+
# name = prefix.split(",")[-1]
667+
# writer.add_scalar(f'samples-per-dataset/{name}', args.consumed_train_samples * weight, args.consumed_train_samples)
668+
# writer.add_scalar(f'steps-per-dataset/{name}', iteration * weight, iteration)
669+
# writer.add_scalar(f'tokens-per-dataset/{name}', args.consumed_train_tokens * weight, args.consumed_train_tokens)
667670

668671
if args.log_timers_to_tensorboard:
669672
timers.write(timers_to_log, writer, iteration,

0 commit comments

Comments
 (0)