@@ -729,7 +729,7 @@ def train(
729
729
# per_device_trainable_numel = sum(p.numel().item() for p in model.parameters() if not p.stop_gradient)
730
730
# TODO: Temporary fix since Tensor.numel() not supported in distributed mode
731
731
per_device_trainable_numel = sum (np .prod (p .shape ) for p in model .parameters () if not p .stop_gradient )
732
- logger .info (f" Number of trainable parameters = { per_device_trainable_numel :,} (per device)" )
732
+ logger .debug (f" Number of trainable parameters = { per_device_trainable_numel :,} (per device)" )
733
733
if self .args .use_hybrid_parallel :
734
734
# todo fix for pipeline_parallel_degree
735
735
parts_num = max (self .args .tensor_parallel_degree , 1 ) * max (self .args .pipeline_parallel_degree , 1 )
@@ -745,7 +745,7 @@ def train(
745
745
trainable_numel = trainable_numel // self .args .sep_parallel_degree
746
746
# the numel is roughly, because the tensor parallel still hold own bias or layer_norm weight without splited
747
747
# so, the trainable numel is a little bigger than real.
748
- logger .info (f" Number of trainable parameters = { trainable_numel :,} (all devices, roughly)" )
748
+ logger .debug (f" Number of trainable parameters = { trainable_numel :,} (all devices, roughly)" )
749
749
750
750
start_time = time .time ()
751
751
self ._globalstep_last_start_time = time .time ()
@@ -2392,7 +2392,7 @@ def log(self, logs: Dict[str, float], **kwargs) -> None:
2392
2392
kwargs .update (timer = self .timers , paddle_pipeline_timers = paddle_pipeline_timers )
2393
2393
2394
2394
if self .state .epoch is not None :
2395
- logs ["epoch " ] = round (self .state .epoch , 4 )
2395
+ logs ["progress_or_epoch " ] = round (self .state .epoch , 4 )
2396
2396
output = {** logs , ** {"step" : self .state .global_step }}
2397
2397
self .state .log_history .append (output )
2398
2398
self .control = self .callback_handler .on_log (self .args , self .state , self .control , logs , ** kwargs )
@@ -2953,23 +2953,23 @@ def print_config(self, args=None, key=""):
2953
2953
"""
2954
2954
print config values
2955
2955
"""
2956
- logger .info ("=" * 60 )
2956
+ logger .debug ("=" * 60 )
2957
2957
if args is None :
2958
2958
args = self .args
2959
2959
key = "Training"
2960
2960
import paddlenlp
2961
2961
2962
- logger .info ("{:^40}" .format ("{} Configuration Arguments" .format (key )))
2963
- logger .info ("{:30}: {}" .format ("paddle commit id" , paddle .version .commit ))
2964
- logger .info ("{:30}: {}" .format ("paddlenlp commit id" , paddlenlp .version .commit ))
2962
+ logger .debug ("{:^40}" .format ("{} Configuration Arguments" .format (key )))
2963
+ logger .debug ("{:30}: {}" .format ("paddle commit id" , paddle .version .commit ))
2964
+ logger .debug ("{:30}: {}" .format ("paddlenlp commit id" , paddlenlp .version .commit ))
2965
2965
2966
2966
for a in dir (args ):
2967
2967
if a [:2 ] != "__" : # don't print double underscore methods
2968
2968
v = getattr (args , a )
2969
2969
if not isinstance (v , types .MethodType ):
2970
- logger .info ("{:30}: {}" .format (a , v ))
2970
+ logger .debug ("{:30}: {}" .format (a , v ))
2971
2971
2972
- logger .info ("" )
2972
+ logger .debug ("" )
2973
2973
2974
2974
def is_unified_checkpoint (self , resume_from_checkpoint , safe_serialization = True ):
2975
2975
is_unified_checkpoint_type = False
0 commit comments