diff --git a/.github/workflows/unix-openmpi.yml b/.github/workflows/unix-openmpi.yml index 56c9fa28..d2ee4748 100644 --- a/.github/workflows/unix-openmpi.yml +++ b/.github/workflows/unix-openmpi.yml @@ -33,6 +33,8 @@ jobs: pip install .[test] - shell: bash -l {0} name: Run unit tests with openMPI + env: + WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} run: | python -m pytest tests/ mpirun -np 3 --oversubscribe python -m pytest --with-mpi tests/test_grid_sampling_mpi.py diff --git a/.github/workflows/unix.yml b/.github/workflows/unix.yml index 203b5cbd..2b4dcaf3 100644 --- a/.github/workflows/unix.yml +++ b/.github/workflows/unix.yml @@ -33,6 +33,8 @@ jobs: pip install .[test] - shell: bash -l {0} name: Run unit tests with MPICH + env: + WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} run: | python -m pytest tests/ mpirun -np 3 python -m pytest --with-mpi tests/test_grid_sampling_mpi.py diff --git a/doc/environment.yaml b/doc/environment.yaml index 3b244d42..b14ac1c7 100644 --- a/doc/environment.yaml +++ b/doc/environment.yaml @@ -15,3 +15,4 @@ dependencies: - sphinx-copybutton - sphinx-design - sphinx-gallery + - wandb diff --git a/doc/source/api/index.rst b/doc/source/api/index.rst index a95b5780..84ca8b7d 100644 --- a/doc/source/api/index.rst +++ b/doc/source/api/index.rst @@ -11,4 +11,5 @@ This reference manual details all classes included in optimas. evaluators exploration diagnostics + loggers utils diff --git a/doc/source/api/loggers.rst b/doc/source/api/loggers.rst new file mode 100644 index 00000000..7736c80e --- /dev/null +++ b/doc/source/api/loggers.rst @@ -0,0 +1,9 @@ +Loggers +======= + +.. currentmodule:: optimas.loggers + +.. autosummary:: + :toctree: _autosummary + + WandBLogger diff --git a/doc/source/user_guide/advanced_usage/log_to_wandb.rst b/doc/source/user_guide/advanced_usage/log_to_wandb.rst new file mode 100644 index 00000000..a6582486 --- /dev/null +++ b/doc/source/user_guide/advanced_usage/log_to_wandb.rst @@ -0,0 +1,155 @@ +Log an ``Exploration`` to Weights and Biases +============================================ + +`Weights and Biases `_ (W&B) is a powerful tool for +tracking and visualizing +machine learning experiments. Optimas has built-in support for logging to W&B, +allowing users to easily track and compare the performance of different +optimization runs. + +This documentation provides a guide on how to use the +:class:`~optimas.loggers.WandBLogger` class +within Optimas to log an :class:`~optimas.explorations.Exploration` +to Weights and Biases. + + +Basic example +------------- + +To log an :class:`~optimas.explorations.Exploration` to Weights and Biases, +you first need to instantiate +a :class:`~optimas.loggers.WandBLogger` object. This object requires several +parameters, including +your W&B API key, the project name, and optionally, a run name, run ID, +data types for specific parameters, and a user-defined function for +custom logs. For example: + +.. code-block:: python + + from optimas.loggers import WandBLogger + + logger = WandBLogger( + api_key="your_wandb_api_key", + project="your_project_name", + run="example_run", # optional + ) + +This logger can then be passed to an ``Exploration``, such as in the example +below: + +.. code-block:: python + + from optimas.explorations import Exploration + from optimas.generators import RandomSamplingGenerator + from optimas.evaluators import FunctionEvaluator + from optimas.loggers import WandBLogger + from optimas.core import VaryingParameter, Objective + + + # Define the function to be optimized + def objective_function(inputs, outputs): + x = inputs["x"] + y = inputs["y"] + outputs["result"] = x**2 + y**2 + + + # Define the evaluator + evaluator = FunctionEvaluator(objective_function) + + # Define the generator + generator = RandomSamplingGenerator( + parameters=[ + VaryingParameter(name="x", lower_bound=-10, upper_bound=10), + VaryingParameter(name="y", lower_bound=-10, upper_bound=10), + ], + objectives=[Objective(name="result", minimize=True)], + ) + + # Instantiate the WandBLogger + logger = WandBLogger( + api_key="your_wandb_api_key", + project="your_project_name", + run="example_run", + ) + + # Create the Exploration and pass the logger and evaluator + exploration = Exploration( + generator=generator, evaluator=evaluator, logger=logger + ) + + # Run the exploration + exploration.run(n_evals=100) + + +Customizing the data type of the logger arguments +------------------------------------------------- + +The ``data_types`` argument allows you to specify the W&B +`data type `_ for specific +parameters when logging to Weights and Biases. This is useful for ensuring +that your data is logged in the desired format. The ``data_types`` should be +a dictionary where the keys are the names of the parameters you wish to +log, and the values are dictionaries containing the ``type`` and +``type_kwargs`` for each parameter. + +For example, if you have defined two analyzed parameters called +``"parameter_1"`` and ``"parameter_2"`` that at each evaluation store +an image or matplotlib +figure and a numpy array, respectively, you can tell the logger to log the +first one as an image, and the second as a histogram: + +.. code-block:: python + + data_types = { + "parameter_1": {"type": wandb.Image, "type_kwargs": {}}, + "parameter_2": {"type": wandb.Histogram, "type_kwargs": {}}, + } + + logger = WandBLogger( + api_key="your_wandb_api_key", + project="your_project_name", + data_types=data_types, + # Other parameters... + ) + + +Defining custom logs +-------------------- + +By default, the ``WandBLogger`` will log the varying parameters, objectives +and analyzed parameters of the ``Exploration``. +If you want to include your own custom logs, you can provide a +``custom_logs`` function that generates them. +This function will be called every time a trial evaluation finishes. + +The ``custom_logs`` function should take two arguments, which correspond to the +most +recently evaluated :class:`~optimas.core.Trial` and the currently active +``Generator``. +You do not need to use them, but they are there for convenience. +The function must then +return a dictionary with the appropriate shape to be given to ``wandb.log``. + +Here's an example of how to define a ``custom_logs`` function: + +.. code-block:: python + + def custom_logs(trial, generator): + # Example: Log the best score so far + best_score = None + trials = generator.completed_trials + for trial in trials: + score = trial.data["result"] + if best_score is None: + best_score = score + elif score < best_score: + best_score = score + return {"Best Score": best_score} + + + logger = WandBLogger( + api_key="your_wandb_api_key", + project="your_project_name", + custom_logs=custom_logs, + # Other parameters... + ) diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst index da3cc7e3..1edab3a7 100644 --- a/doc/source/user_guide/index.rst +++ b/doc/source/user_guide/index.rst @@ -27,6 +27,7 @@ User guide :caption: Advanced usage advanced_usage/build_gp_surrogates + advanced_usage/log_to_wandb .. toctree:: :maxdepth: 1 diff --git a/optimas/core/trial.py b/optimas/core/trial.py index d3815d6b..3ae55f4c 100644 --- a/optimas/core/trial.py +++ b/optimas/core/trial.py @@ -152,6 +152,21 @@ def evaluated(self) -> bool: """Determine whether the trial has been evaluated.""" return self.completed or self.failed + @property + def data(self) -> Dict: + """Get a dictionary with all the trial data.""" + vp_dict = self.parameters_as_dict() + ap_dict = self.analyzed_parameters_as_dict() + ob_dict = self.objectives_as_dict() + # Do not report uncertainty. We haven't yet decided about how to + # report it in the history. + for key, val in ob_dict.items(): + ob_dict[key] = val[0] + for key, val in ap_dict.items(): + ap_dict[key] = val[0] + data = {**vp_dict, **ob_dict, **ap_dict} + return data + def mark_as(self, status) -> None: """Set trial status. diff --git a/optimas/explorations/base.py b/optimas/explorations/base.py index 24250cba..ecef2828 100644 --- a/optimas/explorations/base.py +++ b/optimas/explorations/base.py @@ -21,6 +21,7 @@ from optimas.evaluators.function_evaluator import FunctionEvaluator from optimas.utils.logger import get_logger from optimas.utils.other import convert_to_dataframe +from optimas.loggers.base import Logger logger = get_logger(__name__) @@ -78,6 +79,10 @@ class Exploration: manager and ``N-1`` simulation workers. In this case, the ``sim_workers`` parameter is ignored. By default, ``'local'`` mode is used. + logger : Logger, optional + A custom logger that is informed of every completed trial and can + report on the results. Currently, a Weights and Biases logger is + available. """ @@ -93,6 +98,7 @@ def __init__( exploration_dir_path: Optional[str] = "./exploration", resume: Optional[bool] = False, libe_comms: Optional[Literal["local", "threads", "mpi"]] = "local", + logger: Optional[Logger] = None, ) -> None: # For backward compatibility, check for old threading name. if libe_comms == "local_threading": @@ -125,6 +131,10 @@ def __init__( self._libe_history = self._create_libe_history() self._load_history(history, resume) self._is_manager = self._set_manager(self.libe_comms, self.libE_specs) + self._logger = logger + if self._logger is not None: + self._logger.initialize(self) + self.generator._set_logger(self._logger) @property def is_manager(self): @@ -194,7 +204,7 @@ def run(self, n_evals: Optional[int] = None) -> None: # Get gen_specs and sim_specs. run_params = self.evaluator.get_run_params() gen_specs = self.generator.get_gen_specs( - self.sim_workers, run_params, sim_max + self.sim_workers, run_params, sim_max, self.libe_comms ) sim_specs = self.evaluator.get_sim_specs( self.generator.varying_parameters, @@ -417,7 +427,10 @@ def attach_evaluations( # Fill in new rows. for field in fields: if field in history_new.dtype.names: - history_new[field] = evaluation_data[field] + # Converting to list prevent the error + # "ValueError: setting an array element with a sequence" + # when the field contains an array. + history_new[field] = evaluation_data[field].to_list() if not is_history: current_time = time.time() @@ -507,7 +520,7 @@ def _create_libe_history(self) -> History: """Initialize an empty libEnsemble history.""" run_params = self.evaluator.get_run_params() gen_specs = self.generator.get_gen_specs( - self.sim_workers, run_params, None + self.sim_workers, run_params, None, self.libe_comms ) sim_specs = self.evaluator.get_sim_specs( self.generator.varying_parameters, diff --git a/optimas/generators/ax/developer/multitask.py b/optimas/generators/ax/developer/multitask.py index de4c76ab..e5962e4e 100644 --- a/optimas/generators/ax/developer/multitask.py +++ b/optimas/generators/ax/developer/multitask.py @@ -131,11 +131,17 @@ def __init__( self._experiment = self._create_experiment() def get_gen_specs( - self, sim_workers: int, run_params: Dict, sim_max: int + self, + sim_workers: int, + run_params: Dict, + max_evals: int, + libe_comms: str, ) -> Dict: """Get the libEnsemble gen_specs.""" # Get base specs. - gen_specs = super().get_gen_specs(sim_workers, run_params, sim_max) + gen_specs = super().get_gen_specs( + sim_workers, run_params, max_evals, libe_comms + ) # Add task to output parameters. max_length = max([len(self.lofi_task.name), len(self.hifi_task.name)]) gen_specs["out"].append(("task", str, max_length)) diff --git a/optimas/generators/base.py b/optimas/generators/base.py index a54b6f48..0d79233b 100644 --- a/optimas/generators/base.py +++ b/optimas/generators/base.py @@ -3,7 +3,7 @@ from __future__ import annotations import os from copy import deepcopy -from typing import List, Dict, Optional, Union +from typing import List, Dict, Optional, Union, TYPE_CHECKING import numpy as np import pandas as pd @@ -21,6 +21,9 @@ TrialStatus, ) +if TYPE_CHECKING: + from optimas.loggers.base import Logger + logger = get_logger(__name__) @@ -114,6 +117,7 @@ def __init__( self._queued_trials = [] # Trials queued to be given for evaluation. self._trial_count = 0 self._check_parameters(self._varying_parameters) + self._logger = None @property def varying_parameters(self) -> List[VaryingParameter]: @@ -150,6 +154,11 @@ def dedicated_resources(self) -> bool: """Get whether the generator has dedicated resources allocated.""" return self._dedicated_resources + @property + def completed_trials(self) -> List[Trial]: + """Get list of completed trials.""" + return [trial for trial in self._given_trials if trial.completed] + @property def n_queued_trials(self) -> int: """Get the number of trials queued for evaluation.""" @@ -266,6 +275,8 @@ def tell( else: log_msg = f"Failed to evaluate trial {trial.index}." logger.info(log_msg) + if self._logger is not None: + self._logger.log_trial(trial, self) if allow_saving_model and self._save_model: self.save_model_to_file() @@ -510,7 +521,11 @@ def save_model_to_file(self) -> None: ) def get_gen_specs( - self, sim_workers: int, run_params: Dict, max_evals: int + self, + sim_workers: int, + run_params: Dict, + max_evals: int, + libe_comms: str, ) -> Dict: """Get the libEnsemble gen_specs. @@ -523,6 +538,10 @@ def get_gen_specs( required. max_evals : int Maximum number of evaluations to generate. + libe_comms : {'local', 'threads', 'mpi'}, optional. + The communication mode for libEnseble. Used to determine whether + the generator is running on a thread (and therefore in shared + memory). """ gen_specs = { @@ -613,3 +632,7 @@ def _check_parameters(self, parameters: List[VaryingParameter]): f"{self.__class__.__name__} does not support fixing " "the value of a VaryingParameter." ) + + def _set_logger(self, logger: Logger) -> None: + """Set the generator logger.""" + self._logger = logger diff --git a/optimas/loggers/__init__.py b/optimas/loggers/__init__.py new file mode 100644 index 00000000..7a10ea22 --- /dev/null +++ b/optimas/loggers/__init__.py @@ -0,0 +1,4 @@ +from .wandb_logger import WandBLogger + + +__all__ = ["WandBLogger"] diff --git a/optimas/loggers/base.py b/optimas/loggers/base.py new file mode 100644 index 00000000..4b4e921c --- /dev/null +++ b/optimas/loggers/base.py @@ -0,0 +1,50 @@ +"""Contains the definition of the base Logger class.""" + +from __future__ import annotations +from typing import TYPE_CHECKING +from abc import ABC, abstractmethod + + +if TYPE_CHECKING: + from optimas.core import Trial + from optimas.generators.base import Generator + from optimas.explorations import Exploration + + +class Logger(ABC): + """Base class for all loggers.""" + + def initialize(self, exploration: Exploration): + """Initialize logger. + + Called in `Exploration.__init__`. + + Parameters + ---------- + exploration : Exploration + The exploration instance to which the logger was attached. + """ + pass + + @abstractmethod + def log_trial(self, trial: Trial, generator: Generator): + """Log a trial. + + Called every time an evaluated trial is given back + to the generator. + + Parameters + ---------- + trial : Trial + The last trial that has been evaluated. + generator : Generator + The currently active generator. + """ + pass + + def finish(self): + """Finish logging. + + Meant to be called when the exploration is finished. + """ + pass diff --git a/optimas/loggers/wandb_logger.py b/optimas/loggers/wandb_logger.py new file mode 100644 index 00000000..d0185583 --- /dev/null +++ b/optimas/loggers/wandb_logger.py @@ -0,0 +1,178 @@ +"""Contains the definition of the class for logging to Weights and Biases.""" + +from __future__ import annotations +import pathlib +from typing import TYPE_CHECKING, Optional, Callable, Dict + +from matplotlib.figure import Figure + +try: + import wandb + + wandb_installed = True +except ImportError: + wandb_installed = False + +from .base import Logger + +if TYPE_CHECKING: + from optimas.core import Trial + from optimas.generators.base import Generator + from optimas.explorations import Exploration + + +class WandBLogger(Logger): + r"""Weights and Biases logger class. + + Parameters + ---------- + api_key : str + The API key used to log into Weight and Biases. + project : str + Project name. + run : str, optional + Run name. If not given, a random name will be assigned by W&B. + run_id : str, optional + A unique ID for this run, used for resuming. It must + be unique in the project, and if you delete a run you can't reuse + the ID. Use the ``run`` field for a short descriptive name, or + `config` (passed in the ``init_kwargs``) + for saving hyperparameters to compare across runs. The ID cannot + contain the following special characters: ``/\#?%:``. + See the `W&B guide to resuming runs `_. + data_types : Dict, optional + A dictionary of the shape + ``{"name": {"type": DataType, "type_kwargs": {}}``, + where ``name`` is the + name of a varying parameter, objective or other analyzed parameter, + ``DataType`` is a W&B `DataType `_ + and ``type_kwargs`` can include additional arguments to pass to the + data type. + If provided, the given parameters will be converted to the specified + data types when logging. + custom_logs : Callable, optional + A user-defined function for creating custom logs. This function must + be of the shape `custom_logs(trial, generator)`, where ``trial`` is + the most recently evaluated trial and ``generator`` is the currently + active generator. The function must return a dictionary with the + appropriate shape to that it can be given to `wandb.log`. + login_kwargs : Dict, optional + Additional arguments to pass to ``wandb.login``. + init_kwargs : Dict, optional + Additional arguments to pass to ``wandb.init``. + """ + + def __init__( + self, + api_key: str, + project: str, + run: Optional[str] = None, + run_id: Optional[str] = None, + data_types: Optional[Dict] = None, + custom_logs: Optional[Callable] = None, + login_kwargs: Optional[Dict] = None, + init_kwargs: Optional[Dict] = None, + ) -> None: + if not wandb_installed: + raise ImportError( + "Logging to Weights and Biases requires `wandb` to be " + "installed. Please install it by running " + "`pip install wandb`." + ) + self._api_key = api_key + self._project = project + self._run_name = run + self._run_id = run_id + self._data_types = {} if data_types is None else data_types + self._user_function = custom_logs + self._login_kwargs = {} if login_kwargs is None else login_kwargs + self._init_kwargs = {} if init_kwargs is None else init_kwargs + self._run = None + self._dir = None + + def initialize(self, exploration: Exploration): + """Initialize the W&B logger. + + This method logs into WandB and created a new run using the output + directory if the exploration. + + Parameters + ---------- + exploration : Exploration + The exploration instance to which the logger was attached. + """ + # Create dir if it doesn't exist. + # We need to do this because the logger is typically initialized + # before the exploration runs and, thus, before the exploration dir + # has been created. + dir = exploration.exploration_dir_path + pathlib.Path(dir).mkdir(parents=True, exist_ok=True) + self._dir = dir + + # Login and initialize run. + wandb.login(key=self._api_key, **self._login_kwargs) + if self._run is None: + self._run = wandb.init( + project=self._project, + name=self._run_name, + resume=True, + id=self._run_id, + dir=self._dir, + **self._init_kwargs, + ) + if self._run_id is None: + self._run_id = self._run.id + + def log_trial(self, trial: Trial, generator: Generator): + """Log a trial. + + This method is called every time an evaluated trial is given back + to the generator. + + Parameters + ---------- + trial : Trial + The last trial that has been evaluated. + generator : Generator + The currently active generator. + """ + # Get and process trial data. + logs = trial.data + for key in list(logs.keys()): + # Apply user-provided wandb types. + if key in self._data_types: + logs[key] = self._data_types[key]["type"]( + logs[key], **self._data_types[key]["type_kwargs"] + ) + # By default, convert matplotlib figures to images. + elif isinstance(logs[key], Figure): + logs[key] = wandb.Image(logs[key]) + # By default, only log scalars. + elif hasattr(logs[key], "__len__"): + del logs[key] + + # Organize in sections. + for par in generator.varying_parameters: + if par.name in logs: + logs[f"Varying parameters/{par.name}"] = logs.pop(par.name) + for par in generator.objectives: + if par.name in logs: + logs[f"Objectives/{par.name}"] = logs.pop(par.name) + for par in generator.analyzed_parameters: + if par.name in logs: + logs[f"Analyzed parameters/{par.name}"] = logs.pop(par.name) + + # Add custom user-defined logs. + if self._user_function is not None: + custom_logs = self._user_function(trial, generator) + logs = {**logs, **custom_logs} + + # Log data. + self._run.log(logs) + + def finish(self): + """Finish logging. + + Call this method to finish the current run on W&B. + """ + self._run.finish() diff --git a/pyproject.toml b/pyproject.toml index 5c0632b6..fb2e893b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ test = [ 'pytest-mpi', 'ax-platform >= 0.4.0', 'matplotlib', + 'wandb', ] all = [ 'ax-platform >= 0.4.0', diff --git a/tests/test_wandb_logger.py b/tests/test_wandb_logger.py new file mode 100644 index 00000000..1300ca17 --- /dev/null +++ b/tests/test_wandb_logger.py @@ -0,0 +1,107 @@ +import os + +import wandb +import numpy as np +import matplotlib.pyplot as plt +from copy import deepcopy + +from optimas.explorations import Exploration +from optimas.generators import RandomSamplingGenerator +from optimas.evaluators import FunctionEvaluator +from optimas.core import VaryingParameter, Objective, Parameter +from optimas.loggers import WandBLogger + + +def eval_func(input_params, output_params): + """Evaluation function used for testing""" + x0 = input_params["x0"] + x1 = input_params["x1"] + result = -(x0 + 10 * np.cos(x0)) * (x1 + 5 * np.cos(x1)) + output_params["f"] = result + output_params["p0"] = np.array([[1, 2, 3, 4], [2, 6, 7, 4]]) + + plt.figure() + plt.plot(output_params["p0"][0], output_params["p0"][1]) + output_params["fig"] = deepcopy(plt.gcf()) + plt.figure() + plt.imshow(output_params["p0"]) + output_params["p1"] = deepcopy(plt.gcf()) + + +def custom_logs(last_trial, generator: RandomSamplingGenerator): + """Make and log a cumulative plot of all trials.""" + all_trials = generator.completed_trials + n_trials = len(all_trials) + shape_1 = np.array(all_trials[0].data["p0"]).shape[1] + history = np.zeros((n_trials, shape_1)) + for i, trial in enumerate(all_trials): + history[i] = np.array(trial.data["p0"]).sum(axis=0) + fig, ax = plt.subplots(figsize=(8, 4)) + ax.imshow(history.T, aspect="auto") + return {"history": wandb.Image(fig)} + + +def test_wandb_logger(): + """Test an exploration with a Weights and Biases logger. + + In addition to the varying parameters and objectives, three analyzed + parameters of different type are added: an array and two objects. One + of the objects will store a matplotlib figure. + """ + # Define variables and objectives. + var1 = VaryingParameter("x0", -50.0, 5.0) + var2 = VaryingParameter("x1", -5.0, 15.0) + obj = Objective("f", minimize=False) + # Test also more complex analyzed parameters. + p0 = Parameter("p0", dtype=(float, (2, 4))) + p1 = Parameter("p1", dtype="O") + p2 = Parameter("fig", dtype="O") + + # Create generator. + gen = RandomSamplingGenerator( + varying_parameters=[var1, var2], + objectives=[obj], + analyzed_parameters=[p0, p1, p2], + ) + + # Create function evaluator. + ev = FunctionEvaluator(function=eval_func) + + # Create exploration. + exploration = Exploration( + generator=gen, + evaluator=ev, + max_evals=10, + sim_workers=1, + exploration_dir_path="./tests_output/test_wandb_logger", + logger=WandBLogger( + api_key=os.getenv("WANDB_API_KEY"), + project="GitHub actions", + run="WandB test", + data_types={ + "p0": {"type": wandb.Histogram, "type_kwargs": {}}, + }, + custom_logs=custom_logs, + ), + ) + + # Test also more exotic use cases where the first step is to attach + # manual evaluations. + exploration.attach_evaluations( + { + "x0": [1.0], + "x1": [2.0], + "f": [0.0], + "p0": [np.array([[1, 2, 3, 4], [2, 6, 7, 4]])], + "p1": [plt.figure()], + "fig": [plt.figure()], + } + ) + + # Run exploration in two steps. + exploration.run(3) + exploration.run() + + +if __name__ == "__main__": + test_wandb_logger()