[AutoNLP] refactor verbosity for more effect log control (#4844)

sijunhe · web-flow · commit 48d87a7f8c39 · 2023-02-17T10:01:29.000+08:00
* wip

* changes

* changes

* changes

* ready

* remove extra print
diff --git a/paddlenlp/experimental/autonlp/README.md b/paddlenlp/experimental/autonlp/README.md
@@ -53,6 +53,7 @@ Args:
 - greater_is_better (bool, optional): 更好的模型是否应该有更大的指标。与`metric_for_best_model`结合使用
 - problem_type (str, optional): 根据问题的性质在 [`multi_class`, `multi_label`] 中选择
 - output_dir (str, optional): 输出目录，默认为`autpnlp_results`
+- verbosity: (int, optional): 控制日志的详细程度。默认为“1”，可在driver中看见worker的日志。如果需要减少日志量，请使用 `verbosity > 0` 。
 
 ### 训练
 
@@ -76,7 +77,6 @@ Args:
 - max_concurrent_trials (int, optional): 同时运行的最大试验数。必须是非负数。如果为 None 或 0，则不应用任何限制。默认为None。
 - time_budget_s: (int|float|datetime.timedelta, optional) 以秒为单位的全局时间预算，超过时间后停止所有模型试验。
 - experiment_name: (str, optional): 实验的名称。实验日志将存储在"<output_dir>/<experiment_name>"下。默认为 UNIX 时间戳。
-- verbosity: (int, optional): 控制日志的详细程度。默认为“0”，将日志级别设置为 INFO。如果需要减少日志量，请使用 `verbosity > 0` 将日志级别设置为 WARNINGS
 - hp_overrides: (dict[str, Any], optional): （仅限高级用户）。覆盖每个候选模型的超参数。例如，`{"TrainingArguments.max_steps"：5}`。
 - custom_model_candiates: (dict[str, Any], optional): （仅限高级用户）。运行用户提供的候选模型而不 PaddleNLP 的默认候选模型。可以参考 `._model_candidates` 属性
 
diff --git a/paddlenlp/experimental/autonlp/README_en.md b/paddlenlp/experimental/autonlp/README_en.md
@@ -54,6 +54,8 @@ Args:
 - greater_is_better (bool, optional): Whether better models should have a greater metric or not. Use in conjuction with `metric_for_best_model`.
 - problem_type (str, optional): Select among ["multi_class", "multi_label"] based on the nature of your problem
 - output_dir (str, optional): Output directory for the experiments, defaults to "autpnlp_results"
+- verbosity: (int, optional): controls the verbosity of the run. Defaults to 1, which let the workers log to the driver.To reduce the amount of logs, use verbosity > 0 to set stop the workers from logging to the driver.
+
 
 ### Train
 
@@ -79,7 +81,6 @@ Args:
 - experiment_name: (str, optional): name of the experiment. Experiment log will be stored under `<output_dir>/<experiment_name>`. Defaults to UNIX timestamp.
 - hp_overrides: (dict[str, Any], optional): Advanced users only. override the hyperparameters of every model candidate.  For example, {"TrainingArguments.max_steps": 5}.
 - custom_model_candiates: (dict[str, Any], optional): Advanced users only. Run the user-provided model candidates instead of the default model candidated from PaddleNLP. See `._model_candidates` property as an example
-- verbosity: (int, optional): controls the verbosity of the logger. Defaults to `0`, which set the logger level at INFO. To reduce the amount of logs, use `verbosity > 0` to set the logger level to WARNINGS
 
 ### Evaluations and Examine Results
 
diff --git a/paddlenlp/experimental/autonlp/auto_trainer_base.py b/paddlenlp/experimental/autonlp/auto_trainer_base.py
@@ -18,6 +18,7 @@
 from abc import ABCMeta, abstractmethod
 from typing import Any, Callable, Dict, List, Optional, Union
 
+import ray
 from hyperopt import hp
 from paddle.io import Dataset
 from ray import tune
@@ -44,6 +45,8 @@ class AutoTrainerBase(metaclass=ABCMeta):
         metric_for_best_model (string, optional): the name of the metrc for selecting the best model.
         greater_is_better (bool, required): Whether better models should have a greater metric or not. Use in conjuction with `metric_for_best_model`.
         output_dir (str, optional): Output directory for the experiments, defaults to "autpnlp_results"
+        verbosity: (int, optional): controls the verbosity of the run. Defaults to 1, which let the workers log to the driver.To reduce the amount of logs,
+                use verbosity > 0 to set stop the workers from logging to the driver.
     """
 
     training_path = "training"
@@ -58,6 +61,7 @@ def __init__(
         greater_is_better: bool,
         language: str = "Chinese",
         output_dir: str = "autonlp_results",
+        verbosity: int = 1,
         **kwargs,
     ):
         if not metric_for_best_model.startswith("eval_"):
@@ -75,6 +79,8 @@ def __init__(
         self.language = language
         self.output_dir = output_dir
         self.kwargs = kwargs
+        # use log_to_driver to control verbosity
+        ray.init(ignore_reinit_error=True, log_to_driver=True if verbosity >= 1 else False)
 
     @property
     @abstractmethod
@@ -211,12 +217,6 @@ def _get_model_result(self, trial_id=None):
                 "'AutoTrainer' has no attribute 'training_results'. Have you called the 'train' method?"
             )
 
-    def set_log_level(self):
-        if self.verbosity > 0:
-            logger.set_level("WARNING")
-        else:
-            logger.set_level("INFO")
-
     def show_training_results(self):
         if hasattr(self, "training_results"):
             return self.training_results.get_dataframe()
@@ -246,7 +246,6 @@ def train(
         max_concurrent_trials: Optional[int] = None,
         time_budget_s: Optional[Union[int, float, datetime.timedelta]] = None,
         experiment_name: str = None,
-        verbosity: int = 0,
         hp_overrides: Dict[str, Any] = None,
         custom_model_candidates: List[Dict[str, Any]] = None,
     ) -> ResultGrid:
@@ -263,8 +262,6 @@ def train(
             time_budget_s: (int|float|datetime.timedelta, optional) global time budget in seconds after which all model trials are stopped.
             experiment_name: (str, optional): name of the experiment. Experiment log will be stored under <output_dir>/<experiment_name>.
                 Defaults to UNIX timestamp.
-            verbosity: (int, optional): controls the verbosity of the logger. Defaults to 0, which set the logger level at INFO. To reduce the amount of logs,
-                use verbosity > 0 to set the logger level to WARNINGS
             hp_overrides: (dict[str, Any], optional): Advanced users only.
                 override the hyperparameters of every model candidate.  For example, {"TrainingArguments.max_steps": 5}.
             custom_model_candiates: (dict[str, Any], optional): Advanced users only.
@@ -273,9 +270,6 @@ def train(
         Returns:
             A set of objects for interacting with Ray Tune results. You can use it to inspect the trials and obtain the best result.
         """
-        # Changing logger verbosity here doesn't work. Need to change in the worker's code via the _construct_trainable method.
-        self.verbosity = verbosity
-
         if hasattr(self, "tuner") and self.tuner is not None:
             logger.info("Overwriting the existing Tuner and any previous training results")
 
@@ -307,11 +301,15 @@ def train(
             trainable,
             tune_config=tune_config,
             run_config=RunConfig(
-                name=experiment_name, log_to_file=True, local_dir=self.output_dir if self.output_dir else None
+                name=experiment_name,
+                log_to_file=True,  # TODO: log_to_file doesn't stream logger output to file for some reason
+                local_dir=self.output_dir if self.output_dir else None,
+                callbacks=[tune.logger.CSVLoggerCallback()],
             ),
         )
         self.training_results = self.tuner.fit()
         self.show_training_results().to_csv(
             path_or_buf=os.path.join(self.output_dir, experiment_name, self.results_filename), index=False
         )
+
         return self.training_results
diff --git a/paddlenlp/experimental/autonlp/text_classification.py b/paddlenlp/experimental/autonlp/text_classification.py
@@ -61,6 +61,8 @@ class AutoTrainerForTextClassification(AutoTrainerBase):
             language (string, required): language of the text.
             output_dir (str, optional): Output directory for the experiments, defaults to "autpnlp_results".
             id2label(dict(int,string)): The dictionary to map the predictions from class ids to class names.
+            verbosity: (int, optional): controls the verbosity of the run. Defaults to 1, which let the workers log to the driver.To reduce the amount of logs,
+                use verbosity > 0 to set stop the workers from logging to the driver.
 
     """
 
@@ -317,7 +319,6 @@ def trainable(config):
             # import is required for proper pickling
             from paddlenlp.utils.log import logger
 
-            self.set_log_level()
             config = config["candidates"]
             trainer = self._construct_trainer(config)
             trainer.train()