diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py index 90377fdd36f5..f50e3f0e0926 100644 --- a/paddlenlp/trainer/integrations.py +++ b/paddlenlp/trainer/integrations.py @@ -47,6 +47,10 @@ def is_ray_available(): return importlib.util.find_spec("ray.air") is not None +def is_swanlab_available(): + return importlib.util.find_spec("swanlab") is not None + + def get_available_reporting_integrations(): integrations = [] if is_visualdl_available(): @@ -55,7 +59,8 @@ def get_available_reporting_integrations(): integrations.append("wandb") if is_tensorboardX_available(): integrations.append("tensorboard") - + if is_swanlab_available(): + integrations.append("swanlab") return integrations @@ -410,11 +415,147 @@ def on_evaluate(self, args, state, control, **kwargs): self.session.report(metrics) +class SwanLabCallback(TrainerCallback): + """ + A [`TrainerCallback`] that logs metrics, media, model checkpoints to [SwanLab](https://swanlab.cn/). + """ + + def __init__(self): + if not is_swanlab_available(): + raise RuntimeError("SwanLabCallback requires swanlab to be installed. Run `pip install swanlab`.") + import swanlab + + self._swanlab = swanlab + self._initialized = False + self._log_model = os.getenv("SWANLAB_LOG_MODEL", None) + + def setup(self, args, state, model, **kwargs): + """ + Setup the optional SwanLab (*swanlab*) integration. + + One can subclass and override this method to customize the setup if needed. Find more information + [here](https://docs.swanlab.cn/guide_cloud/integration/integration-huggingface-transformers.html). + + You can also override the following environment variables. Find more information about environment + variables [here](https://docs.swanlab.cn/en/api/environment-variable.html#environment-variables) + + Environment: + - **SWANLAB_API_KEY** (`str`, *optional*, defaults to `None`): + Cloud API Key. During login, this environment variable is checked first. If it doesn't exist, the system + checks if the user is already logged in. If not, the login process is initiated. + + - If a string is passed to the login interface, this environment variable is ignored. + - If the user is already logged in, this environment variable takes precedence over locally stored + login information. + + - **SWANLAB_PROJECT** (`str`, *optional*, defaults to `None`): + Set this to a custom string to store results in a different project. If not specified, the name of the current + running directory is used. + + - **SWANLAB_LOG_DIR** (`str`, *optional*, defaults to `swanlog`): + This environment variable specifies the storage path for log files when running in local mode. + By default, logs are saved in a folder named swanlog under the working directory. + + - **SWANLAB_MODE** (`Literal["local", "cloud", "disabled"]`, *optional*, defaults to `cloud`): + SwanLab's parsing mode, which involves callbacks registered by the operator. Currently, there are three modes: + local, cloud, and disabled. Note: Case-sensitive. Find more information + [here](https://docs.swanlab.cn/en/api/py-init.html#swanlab-init) + + - **SWANLAB_LOG_MODEL** (`str`, *optional*, defaults to `None`): + SwanLab does not currently support the save mode functionality.This feature will be available in a future + release + + - **SWANLAB_WEB_HOST** (`str`, *optional*, defaults to `None`): + Web address for the SwanLab cloud environment for private version (its free) + + - **SWANLAB_API_HOST** (`str`, *optional*, defaults to `None`): + API address for the SwanLab cloud environment for private version (its free) + + """ + self._initialized = True + + if state.is_world_process_zero: + logger.info('Automatic SwanLab logging enabled, to disable set os.environ["SWANLAB_MODE"] = "disabled"') + combined_dict = {**args.to_dict()} + + if hasattr(model, "config") and model.config is not None: + model_config = model.config if isinstance(model.config, dict) else model.config.to_dict() + combined_dict = {**model_config, **combined_dict} + if hasattr(model, "lora_config") and model.lora_config is not None: + lora_config = model.lora_config if isinstance(model.lora_config, dict) else model.lora_config.to_dict() + combined_dict = {**{"lora_config": lora_config}, **combined_dict} + trial_name = state.trial_name + init_args = {} + if trial_name is not None and args.run_name is not None: + init_args["experiment_name"] = f"{args.run_name}-{trial_name}" + elif args.run_name is not None: + init_args["experiment_name"] = args.run_name + elif trial_name is not None: + init_args["experiment_name"] = trial_name + init_args["project"] = os.getenv("SWANLAB_PROJECT", "PaddleNLP") + if args.logging_dir is not None: + init_args["logdir"] = os.getenv("SWANLAB_LOG_DIR", args.logging_dir) + + if self._swanlab.get_run() is None: + self._swanlab.init( + **init_args, + ) + # show paddlenlp logo! + self._swanlab.config["FRAMEWORK"] = "paddlenlp" + # add config parameters (run may have been created manually) + self._swanlab.config.update(combined_dict) + + def on_train_begin(self, args, state, control, model=None, **kwargs): + if not self._initialized: + self.setup(args, state, model, **kwargs) + + def on_train_end(self, args, state, control, model=None, processing_class=None, **kwargs): + if self._log_model is not None and self._initialized and state.is_world_process_zero: + logger.warning( + "SwanLab does not currently support the save mode functionality. " + "This feature will be available in a future release." + ) + + def on_log(self, args, state, control, model=None, logs=None, **kwargs): + single_value_scalars = [ + "train_runtime", + "train_samples_per_second", + "train_steps_per_second", + "train_loss", + "total_flos", + ] + + if not self._initialized: + self.setup(args, state, model) + if state.is_world_process_zero: + for k, v in logs.items(): + if k in single_value_scalars: + self._swanlab.log({f"single_value/{k}": v}, step=state.global_step) + non_scalar_logs = {k: v for k, v in logs.items() if k not in single_value_scalars} + non_scalar_logs = rewrite_logs(non_scalar_logs) + self._swanlab.log({**non_scalar_logs, "train/global_step": state.global_step}, step=state.global_step) + + def on_save(self, args, state, control, **kwargs): + if self._log_model is not None and self._initialized and state.is_world_process_zero: + logger.warning( + "SwanLab does not currently support the save mode functionality. " + "This feature will be available in a future release." + ) + + def on_predict(self, args, state, control, metrics, **kwargs): + if not self._initialized: + self.setup(args, state, **kwargs) + if state.is_world_process_zero: + metrics = rewrite_logs(metrics) + self._swanlab.log(metrics) + + INTEGRATION_TO_CALLBACK = { "visualdl": VisualDLCallback, "autonlp": AutoNLPCallback, "wandb": WandbCallback, "tensorboard": TensorBoardCallback, + "swanlab": SwanLabCallback, } diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py index fe07215e0cf3..f6de0866af02 100644 --- a/paddlenlp/trainer/training_args.py +++ b/paddlenlp/trainer/training_args.py @@ -382,7 +382,7 @@ class TrainingArguments: instance of `Dataset`. report_to (`str` or `List[str]`, *optional*, defaults to `"visualdl"`): The list of integrations to report the results and logs to. - Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`. + Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`/`"swanlab". `"none"` for no integrations. ddp_find_unused_parameters (`bool`, *optional*): When using distributed training, the value of the flag `find_unused_parameters` passed to diff --git a/requirements-dev.txt b/requirements-dev.txt index a69c22200908..7d3d3d56ebca 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -31,4 +31,5 @@ wget huggingface_hub>=0.19.2 tiktoken tokenizers<=0.20.3; python_version<="3.8" -tokenizers>=0.21,<0.22; python_version>"3.8" \ No newline at end of file +tokenizers>=0.21,<0.22; python_version>"3.8" +swanlab[dashboard] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7f4e42796606..a561ffb28eb1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,3 +30,4 @@ ml_dtypes tokenizers<=0.20.3; python_version<="3.8" tokenizers>=0.21,<0.22; python_version>"3.8" omegaconf +swanlab[dashboard] \ No newline at end of file