Skip to content

Commit 48d87a7

Browse files
authored
[AutoNLP] refactor verbosity for more effect log control (#4844)
* wip * changes * changes * changes * ready * remove extra print
1 parent e0e9fe5 commit 48d87a7

File tree

4 files changed

+16
-16
lines changed

4 files changed

+16
-16
lines changed

paddlenlp/experimental/autonlp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Args:
5353
- greater_is_better (bool, optional): 更好的模型是否应该有更大的指标。与`metric_for_best_model`结合使用
5454
- problem_type (str, optional): 根据问题的性质在 [`multi_class`, `multi_label`] 中选择
5555
- output_dir (str, optional): 输出目录,默认为`autpnlp_results`
56+
- verbosity: (int, optional): 控制日志的详细程度。默认为“1”,可在driver中看见worker的日志。如果需要减少日志量,请使用 `verbosity > 0`
5657

5758
### 训练
5859

@@ -76,7 +77,6 @@ Args:
7677
- max_concurrent_trials (int, optional): 同时运行的最大试验数。必须是非负数。如果为 None 或 0,则不应用任何限制。默认为None。
7778
- time_budget_s: (int|float|datetime.timedelta, optional) 以秒为单位的全局时间预算,超过时间后停止所有模型试验。
7879
- experiment_name: (str, optional): 实验的名称。实验日志将存储在"<output_dir>/<experiment_name>"下。默认为 UNIX 时间戳。
79-
- verbosity: (int, optional): 控制日志的详细程度。默认为“0”,将日志级别设置为 INFO。如果需要减少日志量,请使用 `verbosity > 0` 将日志级别设置为 WARNINGS
8080
- hp_overrides: (dict[str, Any], optional): (仅限高级用户)。覆盖每个候选模型的超参数。例如,`{"TrainingArguments.max_steps":5}`
8181
- custom_model_candiates: (dict[str, Any], optional): (仅限高级用户)。运行用户提供的候选模型而不 PaddleNLP 的默认候选模型。可以参考 `._model_candidates` 属性
8282

paddlenlp/experimental/autonlp/README_en.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Args:
5454
- greater_is_better (bool, optional): Whether better models should have a greater metric or not. Use in conjuction with `metric_for_best_model`.
5555
- problem_type (str, optional): Select among ["multi_class", "multi_label"] based on the nature of your problem
5656
- output_dir (str, optional): Output directory for the experiments, defaults to "autpnlp_results"
57+
- verbosity: (int, optional): controls the verbosity of the run. Defaults to 1, which let the workers log to the driver.To reduce the amount of logs, use verbosity > 0 to set stop the workers from logging to the driver.
58+
5759

5860
### Train
5961

@@ -79,7 +81,6 @@ Args:
7981
- experiment_name: (str, optional): name of the experiment. Experiment log will be stored under `<output_dir>/<experiment_name>`. Defaults to UNIX timestamp.
8082
- hp_overrides: (dict[str, Any], optional): Advanced users only. override the hyperparameters of every model candidate. For example, {"TrainingArguments.max_steps": 5}.
8183
- custom_model_candiates: (dict[str, Any], optional): Advanced users only. Run the user-provided model candidates instead of the default model candidated from PaddleNLP. See `._model_candidates` property as an example
82-
- verbosity: (int, optional): controls the verbosity of the logger. Defaults to `0`, which set the logger level at INFO. To reduce the amount of logs, use `verbosity > 0` to set the logger level to WARNINGS
8384

8485
### Evaluations and Examine Results
8586

paddlenlp/experimental/autonlp/auto_trainer_base.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from abc import ABCMeta, abstractmethod
1919
from typing import Any, Callable, Dict, List, Optional, Union
2020

21+
import ray
2122
from hyperopt import hp
2223
from paddle.io import Dataset
2324
from ray import tune
@@ -44,6 +45,8 @@ class AutoTrainerBase(metaclass=ABCMeta):
4445
metric_for_best_model (string, optional): the name of the metrc for selecting the best model.
4546
greater_is_better (bool, required): Whether better models should have a greater metric or not. Use in conjuction with `metric_for_best_model`.
4647
output_dir (str, optional): Output directory for the experiments, defaults to "autpnlp_results"
48+
verbosity: (int, optional): controls the verbosity of the run. Defaults to 1, which let the workers log to the driver.To reduce the amount of logs,
49+
use verbosity > 0 to set stop the workers from logging to the driver.
4750
"""
4851

4952
training_path = "training"
@@ -58,6 +61,7 @@ def __init__(
5861
greater_is_better: bool,
5962
language: str = "Chinese",
6063
output_dir: str = "autonlp_results",
64+
verbosity: int = 1,
6165
**kwargs,
6266
):
6367
if not metric_for_best_model.startswith("eval_"):
@@ -75,6 +79,8 @@ def __init__(
7579
self.language = language
7680
self.output_dir = output_dir
7781
self.kwargs = kwargs
82+
# use log_to_driver to control verbosity
83+
ray.init(ignore_reinit_error=True, log_to_driver=True if verbosity >= 1 else False)
7884

7985
@property
8086
@abstractmethod
@@ -211,12 +217,6 @@ def _get_model_result(self, trial_id=None):
211217
"'AutoTrainer' has no attribute 'training_results'. Have you called the 'train' method?"
212218
)
213219

214-
def set_log_level(self):
215-
if self.verbosity > 0:
216-
logger.set_level("WARNING")
217-
else:
218-
logger.set_level("INFO")
219-
220220
def show_training_results(self):
221221
if hasattr(self, "training_results"):
222222
return self.training_results.get_dataframe()
@@ -246,7 +246,6 @@ def train(
246246
max_concurrent_trials: Optional[int] = None,
247247
time_budget_s: Optional[Union[int, float, datetime.timedelta]] = None,
248248
experiment_name: str = None,
249-
verbosity: int = 0,
250249
hp_overrides: Dict[str, Any] = None,
251250
custom_model_candidates: List[Dict[str, Any]] = None,
252251
) -> ResultGrid:
@@ -263,8 +262,6 @@ def train(
263262
time_budget_s: (int|float|datetime.timedelta, optional) global time budget in seconds after which all model trials are stopped.
264263
experiment_name: (str, optional): name of the experiment. Experiment log will be stored under <output_dir>/<experiment_name>.
265264
Defaults to UNIX timestamp.
266-
verbosity: (int, optional): controls the verbosity of the logger. Defaults to 0, which set the logger level at INFO. To reduce the amount of logs,
267-
use verbosity > 0 to set the logger level to WARNINGS
268265
hp_overrides: (dict[str, Any], optional): Advanced users only.
269266
override the hyperparameters of every model candidate. For example, {"TrainingArguments.max_steps": 5}.
270267
custom_model_candiates: (dict[str, Any], optional): Advanced users only.
@@ -273,9 +270,6 @@ def train(
273270
Returns:
274271
A set of objects for interacting with Ray Tune results. You can use it to inspect the trials and obtain the best result.
275272
"""
276-
# Changing logger verbosity here doesn't work. Need to change in the worker's code via the _construct_trainable method.
277-
self.verbosity = verbosity
278-
279273
if hasattr(self, "tuner") and self.tuner is not None:
280274
logger.info("Overwriting the existing Tuner and any previous training results")
281275

@@ -307,11 +301,15 @@ def train(
307301
trainable,
308302
tune_config=tune_config,
309303
run_config=RunConfig(
310-
name=experiment_name, log_to_file=True, local_dir=self.output_dir if self.output_dir else None
304+
name=experiment_name,
305+
log_to_file=True, # TODO: log_to_file doesn't stream logger output to file for some reason
306+
local_dir=self.output_dir if self.output_dir else None,
307+
callbacks=[tune.logger.CSVLoggerCallback()],
311308
),
312309
)
313310
self.training_results = self.tuner.fit()
314311
self.show_training_results().to_csv(
315312
path_or_buf=os.path.join(self.output_dir, experiment_name, self.results_filename), index=False
316313
)
314+
317315
return self.training_results

paddlenlp/experimental/autonlp/text_classification.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class AutoTrainerForTextClassification(AutoTrainerBase):
6161
language (string, required): language of the text.
6262
output_dir (str, optional): Output directory for the experiments, defaults to "autpnlp_results".
6363
id2label(dict(int,string)): The dictionary to map the predictions from class ids to class names.
64+
verbosity: (int, optional): controls the verbosity of the run. Defaults to 1, which let the workers log to the driver.To reduce the amount of logs,
65+
use verbosity > 0 to set stop the workers from logging to the driver.
6466
6567
"""
6668

@@ -317,7 +319,6 @@ def trainable(config):
317319
# import is required for proper pickling
318320
from paddlenlp.utils.log import logger
319321

320-
self.set_log_level()
321322
config = config["candidates"]
322323
trainer = self._construct_trainer(config)
323324
trainer.train()

0 commit comments

Comments
 (0)