@@ -658,12 +658,15 @@ def add_to_logging(name):
658658 writer .add_scalar ('curriculum_seqlen' , args .curriculum_seqlen ,
659659 iteration )
660660
661- if args .data_weights is not None :
662- for prefix , weight in zip (args .data_prefixes , args .data_weights ):
663- name = prefix .split ("," )[- 1 ]
664- writer .add_scalar (f'samples-per-dataset/{ name } ' , args .consumed_train_samples * weight , args .consumed_train_samples )
665- writer .add_scalar (f'steps-per-dataset/{ name } ' , iteration * weight , iteration )
666- writer .add_scalar (f'tokens-per-dataset/{ name } ' , args .consumed_train_tokens * weight , args .consumed_train_tokens )
661+ # It's very questionable what this data contributes, other than huge unstripped file paths
662+ # as keys and hundreds of TB boards that make the TB files very bloated. So disabling for now.
663+ #
664+ # if args.data_weights is not None:
665+ # for prefix, weight in zip(args.data_prefixes, args.data_weights):
666+ # name = prefix.split(",")[-1]
667+ # writer.add_scalar(f'samples-per-dataset/{name}', args.consumed_train_samples * weight, args.consumed_train_samples)
668+ # writer.add_scalar(f'steps-per-dataset/{name}', iteration * weight, iteration)
669+ # writer.add_scalar(f'tokens-per-dataset/{name}', args.consumed_train_tokens * weight, args.consumed_train_tokens)
667670
668671 if args .log_timers_to_tensorboard :
669672 timers .write (timers_to_log , writer , iteration ,
0 commit comments