Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 142 additions & 1 deletion paddlenlp/trainer/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ def is_ray_available():
return importlib.util.find_spec("ray.air") is not None


def is_swanlab_available():
return importlib.util.find_spec("swanlab") is not None


def get_available_reporting_integrations():
integrations = []
if is_visualdl_available():
Expand All @@ -55,7 +59,8 @@ def get_available_reporting_integrations():
integrations.append("wandb")
if is_tensorboardX_available():
integrations.append("tensorboard")

if is_swanlab_available():
integrations.append("swanlab")
return integrations


Expand Down Expand Up @@ -410,11 +415,147 @@ def on_evaluate(self, args, state, control, **kwargs):
self.session.report(metrics)


class SwanLabCallback(TrainerCallback):
"""
A [`TrainerCallback`] that logs metrics, media, model checkpoints to [SwanLab](https://swanlab.cn/).
"""

def __init__(self):
if not is_swanlab_available():
raise RuntimeError("SwanLabCallback requires swanlab to be installed. Run `pip install swanlab`.")
import swanlab

self._swanlab = swanlab
self._initialized = False
self._log_model = os.getenv("SWANLAB_LOG_MODEL", None)

def setup(self, args, state, model, **kwargs):
"""
Setup the optional SwanLab (*swanlab*) integration.

One can subclass and override this method to customize the setup if needed. Find more information
[here](https://docs.swanlab.cn/guide_cloud/integration/integration-huggingface-transformers.html).

You can also override the following environment variables. Find more information about environment
variables [here](https://docs.swanlab.cn/en/api/environment-variable.html#environment-variables)

Environment:
- **SWANLAB_API_KEY** (`str`, *optional*, defaults to `None`):
Cloud API Key. During login, this environment variable is checked first. If it doesn't exist, the system
checks if the user is already logged in. If not, the login process is initiated.

- If a string is passed to the login interface, this environment variable is ignored.
- If the user is already logged in, this environment variable takes precedence over locally stored
login information.

- **SWANLAB_PROJECT** (`str`, *optional*, defaults to `None`):
Set this to a custom string to store results in a different project. If not specified, the name of the current
running directory is used.

- **SWANLAB_LOG_DIR** (`str`, *optional*, defaults to `swanlog`):
This environment variable specifies the storage path for log files when running in local mode.
By default, logs are saved in a folder named swanlog under the working directory.

- **SWANLAB_MODE** (`Literal["local", "cloud", "disabled"]`, *optional*, defaults to `cloud`):
SwanLab's parsing mode, which involves callbacks registered by the operator. Currently, there are three modes:
local, cloud, and disabled. Note: Case-sensitive. Find more information
[here](https://docs.swanlab.cn/en/api/py-init.html#swanlab-init)

- **SWANLAB_LOG_MODEL** (`str`, *optional*, defaults to `None`):
SwanLab does not currently support the save mode functionality.This feature will be available in a future
release

- **SWANLAB_WEB_HOST** (`str`, *optional*, defaults to `None`):
Web address for the SwanLab cloud environment for private version (its free)

- **SWANLAB_API_HOST** (`str`, *optional*, defaults to `None`):
API address for the SwanLab cloud environment for private version (its free)

"""
self._initialized = True

if state.is_world_process_zero:
logger.info('Automatic SwanLab logging enabled, to disable set os.environ["SWANLAB_MODE"] = "disabled"')
combined_dict = {**args.to_dict()}

if hasattr(model, "config") and model.config is not None:
model_config = model.config if isinstance(model.config, dict) else model.config.to_dict()
combined_dict = {**model_config, **combined_dict}
if hasattr(model, "lora_config") and model.lora_config is not None:
lora_config = model.lora_config if isinstance(model.lora_config, dict) else model.lora_config.to_dict()
combined_dict = {**{"lora_config": lora_config}, **combined_dict}
trial_name = state.trial_name
init_args = {}
if trial_name is not None and args.run_name is not None:
init_args["experiment_name"] = f"{args.run_name}-{trial_name}"
elif args.run_name is not None:
init_args["experiment_name"] = args.run_name
elif trial_name is not None:
init_args["experiment_name"] = trial_name
init_args["project"] = os.getenv("SWANLAB_PROJECT", "PaddleNLP")
if args.logging_dir is not None:
init_args["logdir"] = os.getenv("SWANLAB_LOG_DIR", args.logging_dir)

if self._swanlab.get_run() is None:
self._swanlab.init(
**init_args,
)
# show paddlenlp logo!
self._swanlab.config["FRAMEWORK"] = "paddlenlp"
# add config parameters (run may have been created manually)
self._swanlab.config.update(combined_dict)

def on_train_begin(self, args, state, control, model=None, **kwargs):
if not self._initialized:
self.setup(args, state, model, **kwargs)

def on_train_end(self, args, state, control, model=None, processing_class=None, **kwargs):
if self._log_model is not None and self._initialized and state.is_world_process_zero:
logger.warning(
"SwanLab does not currently support the save mode functionality. "
"This feature will be available in a future release."
)

def on_log(self, args, state, control, model=None, logs=None, **kwargs):
single_value_scalars = [
"train_runtime",
"train_samples_per_second",
"train_steps_per_second",
"train_loss",
"total_flos",
]

if not self._initialized:
self.setup(args, state, model)
if state.is_world_process_zero:
for k, v in logs.items():
if k in single_value_scalars:
self._swanlab.log({f"single_value/{k}": v}, step=state.global_step)
non_scalar_logs = {k: v for k, v in logs.items() if k not in single_value_scalars}
non_scalar_logs = rewrite_logs(non_scalar_logs)
self._swanlab.log({**non_scalar_logs, "train/global_step": state.global_step}, step=state.global_step)

def on_save(self, args, state, control, **kwargs):
if self._log_model is not None and self._initialized and state.is_world_process_zero:
logger.warning(
"SwanLab does not currently support the save mode functionality. "
"This feature will be available in a future release."
)

def on_predict(self, args, state, control, metrics, **kwargs):
if not self._initialized:
self.setup(args, state, **kwargs)
if state.is_world_process_zero:
metrics = rewrite_logs(metrics)
self._swanlab.log(metrics)


INTEGRATION_TO_CALLBACK = {
"visualdl": VisualDLCallback,
"autonlp": AutoNLPCallback,
"wandb": WandbCallback,
"tensorboard": TensorBoardCallback,
"swanlab": SwanLabCallback,
}


Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/trainer/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ class TrainingArguments:
instance of `Dataset`.
report_to (`str` or `List[str]`, *optional*, defaults to `"visualdl"`):
The list of integrations to report the results and logs to.
Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`.
Supported platforms are `"visualdl"`/`"wandb"`/`"tensorboard"`/`"swanlab".
`"none"` for no integrations.
ddp_find_unused_parameters (`bool`, *optional*):
When using distributed training, the value of the flag `find_unused_parameters` passed to
Expand Down
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ wget
huggingface_hub>=0.19.2
tiktoken
tokenizers<=0.20.3; python_version<="3.8"
tokenizers>=0.21,<0.22; python_version>"3.8"
tokenizers>=0.21,<0.22; python_version>"3.8"
swanlab[dashboard]
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ ml_dtypes
tokenizers<=0.20.3; python_version<="3.8"
tokenizers>=0.21,<0.22; python_version>"3.8"
omegaconf
swanlab[dashboard]
Loading