[Cherry-pick] logger level (#7920)

KB-Ding · web-flow · commit 9c5ff0d3cb33 · 2024-01-29T22:22:08.000+08:00
Cherry-pick of #7903
diff --git a/paddlenlp/peft/lora/lora_model.py b/paddlenlp/peft/lora/lora_model.py
@@ -489,7 +489,7 @@ def print_trainable_parameters(self) -> None:
                 freeze_numel += np.prod(weight.shape)
             else:
                 trainable_numel += np.prod(weight.shape)
-        logger.info(
+        logger.debug(
             f"Frozen parameters: {freeze_numel:.2e} || Trainable parameters:{trainable_numel:.2e} || Total parameters:{freeze_numel+trainable_numel:.2e}|| Trainable:{trainable_numel / (freeze_numel+trainable_numel):.2%}"
         )
 
diff --git a/paddlenlp/peft/prefix/prefix_model.py b/paddlenlp/peft/prefix/prefix_model.py
@@ -282,7 +282,7 @@ def print_trainable_parameters(self) -> None:
                 freeze_numel += np.prod(weight.shape)
             else:
                 trainable_numel += np.prod(weight.shape)
-        logger.info(
+        logger.debug(
             f"Frozen parameters: {freeze_numel:.2e} || Trainable parameters:{trainable_numel:.2e} || Total parameters:{freeze_numel+trainable_numel:.2e}|| Trainable:{trainable_numel / (freeze_numel+trainable_numel):.2%}"
         )
 
diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py
@@ -96,7 +96,7 @@ def on_train_begin(self, args, state, control, **kwargs):
 
         if self.vdl_writer is not None:
             self.vdl_writer.add_text("args", args.to_json_string())
-            if "model" in kwargs:
+            if "model" in kwargs and logger.logger.level < 20:
                 model = kwargs["model"]
                 if isinstance(model, LoRAModel) or isinstance(model, PrefixModelForCausalLM):
                     model = kwargs["model"].model
diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py
@@ -729,7 +729,7 @@ def train(
         # per_device_trainable_numel = sum(p.numel().item() for p in model.parameters() if not p.stop_gradient)
         # TODO: Temporary fix since Tensor.numel() not supported in distributed mode
         per_device_trainable_numel = sum(np.prod(p.shape) for p in model.parameters() if not p.stop_gradient)
-        logger.info(f"  Number of trainable parameters = {per_device_trainable_numel:,} (per device)")
+        logger.debug(f"  Number of trainable parameters = {per_device_trainable_numel:,} (per device)")
         if self.args.use_hybrid_parallel:
             # todo fix for pipeline_parallel_degree
             parts_num = max(self.args.tensor_parallel_degree, 1) * max(self.args.pipeline_parallel_degree, 1)
@@ -745,7 +745,7 @@ def train(
                     trainable_numel = trainable_numel // self.args.sep_parallel_degree
                 # the numel is roughly, because the tensor parallel still hold own bias or layer_norm weight without splited
                 # so, the trainable numel is a little bigger than real.
-                logger.info(f"  Number of trainable parameters = {trainable_numel:,} (all devices, roughly)")
+                logger.debug(f"  Number of trainable parameters = {trainable_numel:,} (all devices, roughly)")
 
         start_time = time.time()
         self._globalstep_last_start_time = time.time()
@@ -2392,7 +2392,7 @@ def log(self, logs: Dict[str, float], **kwargs) -> None:
         kwargs.update(timer=self.timers, paddle_pipeline_timers=paddle_pipeline_timers)
 
         if self.state.epoch is not None:
-            logs["epoch"] = round(self.state.epoch, 4)
+            logs["progress_or_epoch"] = round(self.state.epoch, 4)
         output = {**logs, **{"step": self.state.global_step}}
         self.state.log_history.append(output)
         self.control = self.callback_handler.on_log(self.args, self.state, self.control, logs, **kwargs)
@@ -2953,23 +2953,23 @@ def print_config(self, args=None, key=""):
         """
         print config values
         """
-        logger.info("=" * 60)
+        logger.debug("=" * 60)
         if args is None:
             args = self.args
             key = "Training"
         import paddlenlp
 
-        logger.info("{:^40}".format("{} Configuration Arguments".format(key)))
-        logger.info("{:30}: {}".format("paddle commit id", paddle.version.commit))
-        logger.info("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit))
+        logger.debug("{:^40}".format("{} Configuration Arguments".format(key)))
+        logger.debug("{:30}: {}".format("paddle commit id", paddle.version.commit))
+        logger.debug("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit))
 
         for a in dir(args):
             if a[:2] != "__":  # don't print double underscore methods
                 v = getattr(args, a)
                 if not isinstance(v, types.MethodType):
-                    logger.info("{:30}: {}".format(a, v))
+                    logger.debug("{:30}: {}".format(a, v))
 
-        logger.info("")
+        logger.debug("")
 
     def is_unified_checkpoint(self, resume_from_checkpoint, safe_serialization=True):
         is_unified_checkpoint_type = False
diff --git a/paddlenlp/trainer/trainer_callback.py b/paddlenlp/trainer/trainer_callback.py
@@ -515,7 +515,7 @@ def on_log(self, args, state, control, logs=None, **kwargs):
                 logs_str = ", ".join(f"{k}: {v}" for k, v in logs.items())
             else:
                 logs_str = str(logs)
-            self.training_bar.write(logs_str)
+            logger.info(logs_str)
 
     def on_train_end(self, args, state, control, **kwargs):
         if state.is_local_process_zero:
diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py
@@ -1703,21 +1703,21 @@ def print_config(self, args=None, key=""):
         """
         print all config values.
         """
-        logger.info("=" * 60)
+        logger.debug("=" * 60)
         if args is None:
             args = self
             key = "Training"
 
         import paddlenlp
 
-        logger.info("{:^40}".format("{} Configuration Arguments".format(key)))
-        logger.info("{:30}: {}".format("paddle commit id", paddle.version.commit))
-        logger.info("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit))
+        logger.debug("{:^40}".format("{} Configuration Arguments".format(key)))
+        logger.debug("{:30}: {}".format("paddle commit id", paddle.version.commit))
+        logger.debug("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit))
 
         for a in dir(args):
             if a[:2] != "__":  # don't print double underscore methods
                 v = getattr(args, a)
                 if not isinstance(v, types.MethodType):
-                    logger.info("{:30}: {}".format(a, v))
+                    logger.debug("{:30}: {}".format(a, v))
 
-        logger.info("")
+        logger.debug("")
diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py
@@ -1929,15 +1929,23 @@ def _find_mismatched_keys(
             raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}")
 
         if len(unexpected_keys) > 0:
-            logger.warning(
-                f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when"
-                f" initializing {model.__class__.__name__}: {sorted(unexpected_keys)}\n- This IS expected if you are"
-                f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or"
-                " with another architecture (e.g. initializing a BertForSequenceClassification model from a"
-                " BertForPreTraining model).\n- This IS NOT expected if you are initializing"
-                f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical"
-                " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)."
-            )
+            if logger.logger.level < 20:
+                logger.warning(
+                    f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when"
+                    f" initializing {model.__class__.__name__}: {sorted(unexpected_keys)}\n- This IS expected if you are"
+                    f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or"
+                    " with another architecture (e.g. initializing a BertForSequenceClassification model from a"
+                    " BertForPreTraining model).\n- This IS NOT expected if you are initializing"
+                    f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical"
+                    " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)."
+                )
+            else:
+                logger.warning(
+                    f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when"
+                    f" initializing the model, - This IS expected if you are"
+                    f" initializing the model from a checkpoint of a model trained on another task or"
+                    " with another architecture."
+                )
         else:
             logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n")
 

Original file line number	Diff line number	Diff line change
`@@ -489,7 +489,7 @@ def print_trainable_parameters(self) -> None:`
`489`	`489`	`freeze_numel += np.prod(weight.shape)`
`490`	`490`	`else:`
`491`	`491`	`trainable_numel += np.prod(weight.shape)`
`492`		`- logger.info(`
	`492`	`+ logger.debug(`
`493`	`493`	`f"Frozen parameters: {freeze_numel:.2e} \|\| Trainable parameters:{trainable_numel:.2e} \|\| Total parameters:{freeze_numel+trainable_numel:.2e}\|\| Trainable:{trainable_numel / (freeze_numel+trainable_numel):.2%}"`
`494`	`494`	`)`
`495`	`495`
Original file line number	Diff line number	Diff line change
`@@ -282,7 +282,7 @@ def print_trainable_parameters(self) -> None:`
`282`	`282`	`freeze_numel += np.prod(weight.shape)`
`283`	`283`	`else:`
`284`	`284`	`trainable_numel += np.prod(weight.shape)`
`285`		`- logger.info(`
	`285`	`+ logger.debug(`
`286`	`286`	`f"Frozen parameters: {freeze_numel:.2e} \|\| Trainable parameters:{trainable_numel:.2e} \|\| Total parameters:{freeze_numel+trainable_numel:.2e}\|\| Trainable:{trainable_numel / (freeze_numel+trainable_numel):.2%}"`
`287`	`287`	`)`
`288`	`288`