From c240f6a781c810dcf94f2c06aca6037be270b250 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 13:45:30 -0500 Subject: [PATCH 001/109] Prepare experiment load to handle async out of order trial completion --- .../mlos_bench/schedulers/base_scheduler.py | 202 ++++++++++++++---- mlos_bench/mlos_bench/storage/base_storage.py | 20 ++ .../mlos_bench/storage/sql/experiment.py | 45 +++- 3 files changed, 219 insertions(+), 48 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index eaa5527c6d..338f36ffa8 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -100,8 +100,9 @@ def __init__( # pylint: disable=too-many-arguments self._optimizer = optimizer self._storage = storage self._root_env_config = root_env_config - self._last_trial_id = -1 + self._longest_finished_trial_sequence_id = -1 self._ran_trials: list[Storage.Trial] = [] + self._registered_trial_ids: set[int] = set() _LOG.debug("Scheduler instantiated: %s :: %s", self, config) @@ -240,7 +241,6 @@ def __exit__( self._in_context = False return False # Do not suppress exceptions - @abstractmethod def start(self) -> None: """Start the scheduling loop.""" assert self.experiment is not None @@ -255,19 +255,62 @@ def start(self) -> None: if self._config_id > 0: tunables = self.load_tunable_config(self._config_id) - self.schedule_trial(tunables) + # If a config_id is provided, assume it is expected to be run immediately. + self.add_trial_to_queue(tunables, ts_start=datetime.now(UTC)) + + is_warm_up: bool = self.optimizer.supports_preload + if not is_warm_up: + _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) + + not_done: bool = True + while not_done: + _LOG.info( + "Optimization loop: Longest finished trial sequence ID: %d", + self._longest_finished_trial_sequence_id, + ) + self.run_schedule(is_warm_up) + self.wait_for_trial_runners() + not_done = self.add_new_optimizer_suggestions() + self.assign_trial_runners( + self.experiment.pending_trials( + datetime.now(UTC), + running=False, + trial_runner_assigned=False, + ) + ) + is_warm_up = False + self.wait_for_trial_runners(wait_all=True) + + @abstractmethod + def wait_for_trial_runners(self, wait_all: bool = False) -> None: + """ + Wait for (enough) TrialRunners to finish. + + This is a blocking call that waits for enough of the the TrialRunners to finish. + The base class implementation waits for all of the TrialRunners to finish. + However this can be overridden in subclasses to implement a more asynchronous behavior. + + Parameters + ---------- + wait_all : bool + If True, wait for all TrialRunners to finish. + If False, wait for "enough" TrialRunners to finish (which for the + base class is all of them). + """ def teardown(self) -> None: """ Tear down the TrialRunners/Environment(s). - Call it after the completion of the `.start()` in the scheduler context. + Call it after the completion of the :py:meth:`Scheduler.start` in the + Scheduler context. """ assert self.experiment is not None if self._do_teardown: for trial_runner in self._trial_runners.values(): assert not trial_runner.is_running - trial_runner.teardown() + with trial_runner: + trial_runner.teardown() def get_best_observation(self) -> tuple[dict[str, float] | None, TunableGroups | None]: """Get the best observation from the optimizer.""" @@ -287,54 +330,112 @@ def load_tunable_config(self, config_id: int) -> TunableGroups: _LOG.debug("Config %d ::\n%s", config_id, json.dumps(tunable_values, indent=2)) return tunables.copy() - def _schedule_new_optimizer_suggestions(self) -> bool: + def add_new_optimizer_suggestions(self) -> bool: """ Optimizer part of the loop. - Load the results of the executed trials into the optimizer, suggest new - configurations, and add them to the queue. Return True if optimization is not - over, False otherwise. + Load the results of the executed trials into the + :py:class:`~.Optimizer`, suggest new configurations, and add them to the + queue. + + Returns + ------- + bool + The return value indicates whether the optimization process should + continue to get suggestions from the Optimizer or not. + See Also: :py:meth:`~.Scheduler.not_done`. """ assert self.experiment is not None - (trial_ids, configs, scores, status) = self.experiment.load(self._last_trial_id) + # Load the results of the trials that have been run since the last time + # we queried the Optimizer. + # Note: We need to handle the case of straggler trials that finish out of order. + (trial_ids, configs, scores, status) = self.experiment.load( + last_trial_id=self._longest_finished_trial_sequence_id, + omit_registered_trial_ids=self._registered_trial_ids, + ) _LOG.info("QUEUE: Update the optimizer with trial results: %s", trial_ids) self.optimizer.bulk_register(configs, scores, status) - self._last_trial_id = max(trial_ids, default=self._last_trial_id) + # Mark those trials as registered so we don't load them again. + self._registered_trial_ids.update(trial_ids) + # Update the longest finished trial sequence ID. + self._longest_finished_trial_sequence_id = max( + [ + self.experiment.get_longest_prefix_finished_trial_id(), + self._longest_finished_trial_sequence_id, + ], + default=self._longest_finished_trial_sequence_id, + ) + # Remove trial ids that are older than the longest finished trial sequence ID. + # This is an optimization to avoid a long list of trial ids to omit from + # the load() operation or a long list of trial ids to maintain in memory. + self._registered_trial_ids = { + trial_id + for trial_id in self._registered_trial_ids + if trial_id > self._longest_finished_trial_sequence_id + } + # Check if the optimizer has converged or not. not_done = self.not_done() if not_done: + # TODO: Allow scheduling multiple configs at once (e.g., in the case of idle workers). tunables = self.optimizer.suggest() - self.schedule_trial(tunables) - + self.add_trial_to_queue(tunables) return not_done - def schedule_trial(self, tunables: TunableGroups) -> None: - """Add a configuration to the queue of trials.""" - # TODO: Alternative scheduling policies may prefer to expand repeats over - # time as well as space, or adjust the number of repeats (budget) of a given - # trial based on whether initial results are promising. + def add_trial_to_queue( + self, + tunables: TunableGroups, + ts_start: datetime | None = None, + ) -> None: + """ + Add a configuration to the queue of trials 1 or more times. + + (e.g., according to the :py:attr:`~.Scheduler.trial_config_repeat_count`) + + Parameters + ---------- + tunables : TunableGroups + The tunable configuration to add to the queue. + + ts_start : datetime | None + Optional timestamp to use to start the trial. + + Notes + ----- + Alternative scheduling policies may prefer to expand repeats over + time as well as space, or adjust the number of repeats (budget) of a given + trial based on whether initial results are promising. + """ for repeat_i in range(1, self._trial_config_repeat_count + 1): self._add_trial_to_queue( tunables, - config={ - # Add some additional metadata to track for the trial such as the - # optimizer config used. - # Note: these values are unfortunately mutable at the moment. - # Consider them as hints of what the config was the trial *started*. - # It is possible that the experiment configs were changed - # between resuming the experiment (since that is not currently - # prevented). - "optimizer": self.optimizer.name, - "repeat_i": repeat_i, - "is_defaults": tunables.is_defaults(), - **{ - f"opt_{key}_{i}": val - for (i, opt_target) in enumerate(self.optimizer.targets.items()) - for (key, val) in zip(["target", "direction"], opt_target) - }, - }, + ts_start=ts_start, + config=self._augment_trial_config_metadata(tunables, repeat_i), ) + def _augment_trial_config_metadata( + self, + tunables: TunableGroups, + repeat_i: int, + ) -> dict[str, Any]: + return { + # Add some additional metadata to track for the trial such as the + # optimizer config used. + # Note: these values are unfortunately mutable at the moment. + # Consider them as hints of what the config was the trial *started*. + # It is possible that the experiment configs were changed + # between resuming the experiment (since that is not currently + # prevented). + "optimizer": self.optimizer.name, + "repeat_i": repeat_i, + "is_defaults": tunables.is_defaults(), + **{ + f"opt_{key}_{i}": val + for (i, opt_target) in enumerate(self.optimizer.targets.items()) + for (key, val) in zip(["target", "direction"], opt_target) + }, + } + def _add_trial_to_queue( self, tunables: TunableGroups, @@ -352,10 +453,10 @@ def _add_trial_to_queue( def assign_trial_runners(self, trials: Iterable[Storage.Trial]) -> None: """ - Assigns TrialRunners to the given Trial in batch. + Assigns :py:class:`~.TrialRunner`s to the given :py:class:`~.Trial`s in batch. - The base class implements a simple round-robin scheduling algorithm for each - Trial in sequence. + The base class implements a simple round-robin scheduling algorithm for + each Trial in sequence. Subclasses can override this method to implement a more sophisticated policy. For instance:: @@ -375,6 +476,11 @@ def assign_trial_runners( trial.set_trial_runner(trial_runner) ... + Notes + ----- + Subclasses are *not* required to assign a TrialRunner to the Trial + (e.g., if the Trial should be deferred to a later time). + Parameters ---------- trials : Iterable[Storage.Trial] @@ -411,7 +517,8 @@ def assign_trial_runners( def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: """ - Gets the TrialRunner associated with the given Trial. + Gets the :py:class:`~.TrialRunner` associated with the given + :py:class:`~.Storage.Trial`. Parameters ---------- @@ -434,25 +541,30 @@ def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: assert trial_runner.trial_runner_id == trial.trial_runner_id return trial_runner - def _run_schedule(self, running: bool = False) -> None: + def run_schedule(self, running: bool = False) -> None: """ - Scheduler part of the loop. + Runs the current schedule of trials. - Check for pending trials in the queue and run them. + Check for :py:class:`.Trial`s with `:py:attr:`.Status.PENDING` and an + assigned :py:attr:`~.Trial.trial_runner_id` in the queue and run them + with :py:meth:`~.Scheduler.run_trial`. """ assert self.experiment is not None - # Make sure that any pending trials have a TrialRunner assigned. pending_trials = list(self.experiment.pending_trials(datetime.now(UTC), running=running)) - self.assign_trial_runners(pending_trials) for trial in pending_trials: + if trial.trial_runner_id is None: + logging.warning("Trial %s has no TrialRunner assigned yet.") + continue self.run_trial(trial) def not_done(self) -> bool: """ Check the stopping conditions. - By default, stop when the optimizer converges or max limit of trials reached. + By default, stop when the :py:class:`.Optimizer` converges or the limit + of :py:attr:`~.Scheduler.max_trials` is reached. """ + # TODO: Add more stopping conditions: https://github.com/microsoft/MLOS/issues/427 return self.optimizer.not_converged() and ( self._trial_count < self._max_trials or self._max_trials <= 0 ) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index f2d393994f..f5b5d5a43a 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -284,10 +284,20 @@ def load_telemetry(self, trial_id: int) -> list[tuple[datetime, str, Any]]: Telemetry data. """ + @abstractmethod + def get_longest_prefix_finished_trial_id(self) -> int: + """ + Calculate the last trial ID for the experiment. + + This is used to determine the last trial ID that finished (failed or + successful) such that all Trials before it are also finished. + """ + @abstractmethod def load( self, last_trial_id: int = -1, + omit_registered_trial_ids: Iterable[int] | None = None, ) -> tuple[list[int], list[dict], list[dict[str, Any] | None], list[Status]]: """ Load (tunable values, benchmark scores, status) to warm-up the optimizer. @@ -296,10 +306,20 @@ def load( that were scheduled *after* the given trial ID. Otherwise, return data from ALL merged-in experiments and attempt to impute the missing tunable values. + Additionally, if `omit_registered_trial_ids` is provided, omit the + trials matching those ids. + + The parameters together allow us to efficiently load data from + finished trials that we haven't registered with the Optimizer yet + for bulk registering. + Parameters ---------- last_trial_id : int (Optional) Trial ID to start from. + omit_registered_trial_ids : Iterable[int] | None = None, + (Optional) List of trial IDs to omit. If None, load all trials. + Returns ------- diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index eb47de7d71..0dd881f3d8 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -8,7 +8,7 @@ import hashlib import logging -from collections.abc import Iterator +from collections.abc import Iterable, Iterator from datetime import datetime from typing import Any, Literal @@ -153,13 +153,43 @@ def load_telemetry(self, trial_id: int) -> list[tuple[datetime, str, Any]]: for row in cur_telemetry.fetchall() ] + # TODO: Add a test for this method. + def get_longest_prefix_finished_trial_id(self) -> int: + with self._engine.connect() as conn: + # Get the first (minimum) trial ID with an unfinished status. + first_unfinished_trial_id_stmt = ( + self._schema.trial.select() + .with_only_columns( + func.min(self._schema.trial.c.trial_id), + ) + .where( + self._schema.trial.c.exp_id == self._experiment_id, + func.not_( + self._schema.trial.c.status.in_( + [ + Status.SUCCEEDED.name, + Status.FAILED.name, + Status.TIMED_OUT.name, + ] + ), + ), + ) + ) + + max_trial_id = conn.execute(first_unfinished_trial_id_stmt).scalar() + if max_trial_id is None: + return -1 + # Return one less than the first unfinished trial ID - it should be + # finished (or not exist, which is fine as a limit). + return int(max_trial_id) - 1 + def load( self, last_trial_id: int = -1, + omit_registered_trial_ids: Iterable[int] | None = None, ) -> tuple[list[int], list[dict], list[dict[str, Any] | None], list[Status]]: - with self._engine.connect() as conn: - cur_trials = conn.execute( + stmt = ( self._schema.trial.select() .with_only_columns( self._schema.trial.c.trial_id, @@ -182,6 +212,15 @@ def load( ) ) + # TODO: Add a test for this parameter. + + # Note: if we have a very large number of trials, this may encounter + # SQL text length limits, so we may need to chunk this. + if omit_registered_trial_ids is not None: + stmt = stmt.where(self._schema.trial.c.trial_id.notin_(omit_registered_trial_ids)) + + cur_trials = conn.execute(stmt) + trial_ids: list[int] = [] configs: list[dict[str, Any]] = [] scores: list[dict[str, Any] | None] = [] From 4c767f075a891a1c04d9e3ace170b1e943c3030c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 16:09:32 -0500 Subject: [PATCH 002/109] Rename some methods and move some to the base class in preparation for some reuse by ParallelTrialScheduler --- .../mlos_bench/schedulers/base_scheduler.py | 168 +++++++++++++----- .../mlos_bench/schedulers/sync_scheduler.py | 18 +- .../mlos_bench/schedulers/trial_runner.py | 12 +- 3 files changed, 137 insertions(+), 61 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index eaa5527c6d..d64f4e812d 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -240,7 +240,6 @@ def __exit__( self._in_context = False return False # Do not suppress exceptions - @abstractmethod def start(self) -> None: """Start the scheduling loop.""" assert self.experiment is not None @@ -255,13 +254,55 @@ def start(self) -> None: if self._config_id > 0: tunables = self.load_tunable_config(self._config_id) - self.schedule_trial(tunables) + # If a config_id is provided, assume it is expected to be run immediately. + self.add_trial_to_queue(tunables, ts_start=datetime.now(UTC)) + + is_warm_up: bool = self.optimizer.supports_preload + if not is_warm_up: + _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) + + not_done: bool = True + while not_done: + _LOG.info( + "Optimization loop: Longest finished trial sequence ID: %d", + self._longest_finished_trial_sequence_id, + ) + self.run_schedule(is_warm_up) + self.wait_for_trial_runners() + not_done = self.add_new_optimizer_suggestions() + self.assign_trial_runners( + self.experiment.pending_trials( + datetime.now(UTC), + running=False, + trial_runner_assigned=False, + ) + ) + is_warm_up = False + self.wait_for_trial_runners(wait_all=True) + + @abstractmethod + def wait_for_trial_runners(self, wait_all: bool = False) -> None: + """ + Wait for (enough) TrialRunners to finish. + + This is a blocking call that waits for enough of the the TrialRunners to finish. + The base class implementation waits for all of the TrialRunners to finish. + However this can be overridden in subclasses to implement a more asynchronous behavior. + + Parameters + ---------- + wait_all : bool + If True, wait for all TrialRunners to finish. + If False, wait for "enough" TrialRunners to finish (which for the + base class is all of them). + """ def teardown(self) -> None: """ Tear down the TrialRunners/Environment(s). - Call it after the completion of the `.start()` in the scheduler context. + Call it after the completion of the :py:meth:`Scheduler.start` in the + Scheduler context. """ assert self.experiment is not None if self._do_teardown: @@ -287,13 +328,20 @@ def load_tunable_config(self, config_id: int) -> TunableGroups: _LOG.debug("Config %d ::\n%s", config_id, json.dumps(tunable_values, indent=2)) return tunables.copy() - def _schedule_new_optimizer_suggestions(self) -> bool: + def add_new_optimizer_suggestions(self) -> bool: """ Optimizer part of the loop. - Load the results of the executed trials into the optimizer, suggest new - configurations, and add them to the queue. Return True if optimization is not - over, False otherwise. + Load the results of the executed trials into the + :py:class:`~.Optimizer`, suggest new configurations, and add them to the + queue. + + Returns + ------- + bool + The return value indicates whether the optimization process should + continue to get suggestions from the Optimizer or not. + See Also: :py:meth:`~.Scheduler.not_done`. """ assert self.experiment is not None (trial_ids, configs, scores, status) = self.experiment.load(self._last_trial_id) @@ -301,40 +349,67 @@ def _schedule_new_optimizer_suggestions(self) -> bool: self.optimizer.bulk_register(configs, scores, status) self._last_trial_id = max(trial_ids, default=self._last_trial_id) + # Check if the optimizer has converged or not. not_done = self.not_done() if not_done: tunables = self.optimizer.suggest() - self.schedule_trial(tunables) - + self.add_trial_to_queue(tunables) return not_done - def schedule_trial(self, tunables: TunableGroups) -> None: - """Add a configuration to the queue of trials.""" - # TODO: Alternative scheduling policies may prefer to expand repeats over - # time as well as space, or adjust the number of repeats (budget) of a given - # trial based on whether initial results are promising. + def add_trial_to_queue( + self, + tunables: TunableGroups, + ts_start: datetime | None = None, + ) -> None: + """ + Add a configuration to the queue of trials 1 or more times. + + (e.g., according to the :py:attr:`~.Scheduler.trial_config_repeat_count`) + + Parameters + ---------- + tunables : TunableGroups + The tunable configuration to add to the queue. + + ts_start : datetime | None + Optional timestamp to use to start the trial. + + Notes + ----- + Alternative scheduling policies may prefer to expand repeats over + time as well as space, or adjust the number of repeats (budget) of a given + trial based on whether initial results are promising. + """ for repeat_i in range(1, self._trial_config_repeat_count + 1): self._add_trial_to_queue( tunables, - config={ - # Add some additional metadata to track for the trial such as the - # optimizer config used. - # Note: these values are unfortunately mutable at the moment. - # Consider them as hints of what the config was the trial *started*. - # It is possible that the experiment configs were changed - # between resuming the experiment (since that is not currently - # prevented). - "optimizer": self.optimizer.name, - "repeat_i": repeat_i, - "is_defaults": tunables.is_defaults(), - **{ - f"opt_{key}_{i}": val - for (i, opt_target) in enumerate(self.optimizer.targets.items()) - for (key, val) in zip(["target", "direction"], opt_target) - }, - }, + ts_start=ts_start, + config=self._augment_trial_config_metadata(tunables, repeat_i), ) + def _augment_trial_config_metadata( + self, + tunables: TunableGroups, + repeat_i: int, + ) -> dict[str, Any]: + return { + # Add some additional metadata to track for the trial such as the + # optimizer config used. + # Note: these values are unfortunately mutable at the moment. + # Consider them as hints of what the config was the trial *started*. + # It is possible that the experiment configs were changed + # between resuming the experiment (since that is not currently + # prevented). + "optimizer": self.optimizer.name, + "repeat_i": repeat_i, + "is_defaults": tunables.is_defaults(), + **{ + f"opt_{key}_{i}": val + for (i, opt_target) in enumerate(self.optimizer.targets.items()) + for (key, val) in zip(["target", "direction"], opt_target) + }, + } + def _add_trial_to_queue( self, tunables: TunableGroups, @@ -352,10 +427,10 @@ def _add_trial_to_queue( def assign_trial_runners(self, trials: Iterable[Storage.Trial]) -> None: """ - Assigns TrialRunners to the given Trial in batch. + Assigns :py:class:`~.TrialRunner`s to the given :py:class:`~.Trial`s in batch. - The base class implements a simple round-robin scheduling algorithm for each - Trial in sequence. + The base class implements a simple round-robin scheduling algorithm for + each Trial in sequence. Subclasses can override this method to implement a more sophisticated policy. For instance:: @@ -375,6 +450,11 @@ def assign_trial_runners( trial.set_trial_runner(trial_runner) ... + Notes + ----- + Subclasses are *not* required to assign a TrialRunner to the Trial + (e.g., if the Trial should be deferred to a later time). + Parameters ---------- trials : Iterable[Storage.Trial] @@ -411,7 +491,8 @@ def assign_trial_runners( def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: """ - Gets the TrialRunner associated with the given Trial. + Gets the :py:class:`~.TrialRunner` associated with the given + :py:class:`~.Storage.Trial`. Parameters ---------- @@ -434,25 +515,30 @@ def get_trial_runner(self, trial: Storage.Trial) -> TrialRunner: assert trial_runner.trial_runner_id == trial.trial_runner_id return trial_runner - def _run_schedule(self, running: bool = False) -> None: + def run_schedule(self, running: bool = False) -> None: """ - Scheduler part of the loop. + Runs the current schedule of trials. - Check for pending trials in the queue and run them. + Check for :py:class:`.Trial`s with `:py:attr:`.Status.PENDING` and an + assigned :py:attr:`~.Trial.trial_runner_id` in the queue and run them + with :py:meth:`~.Scheduler.run_trial`. """ assert self.experiment is not None - # Make sure that any pending trials have a TrialRunner assigned. pending_trials = list(self.experiment.pending_trials(datetime.now(UTC), running=running)) - self.assign_trial_runners(pending_trials) for trial in pending_trials: + if trial.trial_runner_id is None: + logging.warning("Trial %s has no TrialRunner assigned yet.") + continue self.run_trial(trial) def not_done(self) -> bool: """ Check the stopping conditions. - By default, stop when the optimizer converges or max limit of trials reached. + By default, stop when the :py:class:`.Optimizer` converges or the limit + of :py:attr:`~.Scheduler.max_trials` is reached. """ + # TODO: Add more stopping conditions: https://github.com/microsoft/MLOS/issues/427 return self.optimizer.not_converged() and ( self._trial_count < self._max_trials or self._max_trials <= 0 ) diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py index 4b864942dc..36ba2973d5 100644 --- a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -15,24 +15,10 @@ class SyncScheduler(Scheduler): """A simple single-threaded synchronous optimization loop implementation.""" - def start(self) -> None: - """Start the optimization loop.""" - super().start() - - is_warm_up = self.optimizer.supports_preload - if not is_warm_up: - _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) - - not_done = True - while not_done: - _LOG.info("Optimization loop: Last trial ID: %d", self._last_trial_id) - self._run_schedule(is_warm_up) - not_done = self._schedule_new_optimizer_suggestions() - is_warm_up = False - def run_trial(self, trial: Storage.Trial) -> None: """ - Set up and run a single trial. + Set up and run a single :py:class:`~.Storage.Trial` on its + :py:class:`~.TrialRunner`. Save the results in the storage. """ diff --git a/mlos_bench/mlos_bench/schedulers/trial_runner.py b/mlos_bench/mlos_bench/schedulers/trial_runner.py index 80eb696bc6..63c15a0e1f 100644 --- a/mlos_bench/mlos_bench/schedulers/trial_runner.py +++ b/mlos_bench/mlos_bench/schedulers/trial_runner.py @@ -20,6 +20,7 @@ from mlos_bench.services.types import SupportsConfigLoading from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.tunables.tunable_types import TunableValue _LOG = logging.getLogger(__name__) @@ -168,7 +169,7 @@ def run_trial( self, trial: Storage.Trial, global_config: dict[str, Any] | None = None, - ) -> None: + ) -> tuple[Status, datetime, dict[str, TunableValue] | None]: """ Run a single trial on this TrialRunner's Environment and stores the results in the backend Trial Storage. @@ -198,9 +199,10 @@ def run_trial( if not self.environment.setup(trial.tunables, trial.config(global_config)): _LOG.warning("Setup failed: %s :: %s", self.environment, trial.tunables) # FIXME: Use the actual timestamp from the environment. - _LOG.info("TrialRunner: Update trial results: %s :: %s", trial, Status.FAILED) - trial.update(Status.FAILED, datetime.now(UTC)) - return + (status, timestamp, results) = (Status.FAILED, datetime.now(UTC), None) + _LOG.info("TrialRunner: Update trial results: %s :: %s", trial, status) + trial.update(status, timestamp) + return (status, timestamp, results) # TODO: start background status polling of the environments in the event loop. @@ -221,6 +223,8 @@ def run_trial( self._is_running = False + return (status, timestamp, results) + def teardown(self) -> None: """ Tear down the Environment. From 0261125e10168c38916409c43a17ba021113a1ea Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 16:21:35 -0500 Subject: [PATCH 003/109] remove wait_for_trial_runners for now --- .../mlos_bench/schedulers/base_scheduler.py | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index d64f4e812d..5bbbedfbca 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -263,12 +263,8 @@ def start(self) -> None: not_done: bool = True while not_done: - _LOG.info( - "Optimization loop: Longest finished trial sequence ID: %d", - self._longest_finished_trial_sequence_id, - ) + _LOG.info("Optimization loop: Last trial ID: %d", self._last_trial_id) self.run_schedule(is_warm_up) - self.wait_for_trial_runners() not_done = self.add_new_optimizer_suggestions() self.assign_trial_runners( self.experiment.pending_trials( @@ -278,24 +274,6 @@ def start(self) -> None: ) ) is_warm_up = False - self.wait_for_trial_runners(wait_all=True) - - @abstractmethod - def wait_for_trial_runners(self, wait_all: bool = False) -> None: - """ - Wait for (enough) TrialRunners to finish. - - This is a blocking call that waits for enough of the the TrialRunners to finish. - The base class implementation waits for all of the TrialRunners to finish. - However this can be overridden in subclasses to implement a more asynchronous behavior. - - Parameters - ---------- - wait_all : bool - If True, wait for all TrialRunners to finish. - If False, wait for "enough" TrialRunners to finish (which for the - base class is all of them). - """ def teardown(self) -> None: """ From 5db8fb2a5c3f043152a9410017718a3f4ea1b5ef Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 16:21:52 -0500 Subject: [PATCH 004/109] remove vscode setting --- .vscode/settings.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 23e43fed68..9be9b50917 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -146,8 +146,7 @@ "isort.check": true, "[python]": { "editor.codeActionsOnSave": { - "source.organizeImports": "explicit", - "source.unusedImports": "explicit" + "source.organizeImports": "explicit" }, "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, From 540cbcef63a9e769c79d6d366f9eb35001e826cd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 16:23:03 -0500 Subject: [PATCH 005/109] comments --- mlos_bench/mlos_bench/storage/base_storage.py | 17 +++++++-- .../mlos_bench/storage/sql/experiment.py | 37 ++++++++++++------- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index f2d393994f..460a8ce488 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -313,18 +313,27 @@ def pending_trials( timestamp: datetime, *, running: bool, + trial_runner_assigned: bool | None = None, ) -> Iterator["Storage.Trial"]: """ - Return an iterator over the pending trials that are scheduled to run on or - before the specified timestamp. + Return an iterator over the :py:class:`~.Storage.Trial`s that are + :py:attr:`~.Status.PENDING` and have a scheduled + :py:attr:`~.Storage.Trial.ts_start` time to run on or before the specified + timestamp. Parameters ---------- timestamp : datetime.datetime - The time in UTC to check for scheduled trials. + The time in UTC to check for scheduled Trials. running : bool - If True, include the trials that are already running. + If True, include the Trials that are also + :py:attr:`~.Status.RUNNING` or :py:attr:`~.Status.READY`. Otherwise, return only the scheduled trials. + trial_runner_assigned : bool | None + If True, include the Trials that are assigned to a + :py:class:`~.TrialRunner`. If False, return only the trials + that are not assigned to any :py:class:`~.TrialRunner`. + If None, return all trials regardless of their assignment. Returns ------- diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index eb47de7d71..30ce3a11a3 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -235,25 +235,36 @@ def _get_key_val(conn: Connection, table: Table, field: str, **kwargs: Any) -> d row._tuple() for row in cur_result.fetchall() # pylint: disable=protected-access ) - def pending_trials(self, timestamp: datetime, *, running: bool) -> Iterator[Storage.Trial]: + # TODO: Add tests for trial_runner_assigned filtering. + def pending_trials( + self, + timestamp: datetime, + *, + running: bool = False, + trial_runner_assigned: bool | None = None, + ) -> Iterator[Storage.Trial]: timestamp = utcify_timestamp(timestamp, origin="local") _LOG.info("Retrieve pending trials for: %s @ %s", self._experiment_id, timestamp) if running: - pending_status = [Status.PENDING.name, Status.READY.name, Status.RUNNING.name] + statuses = [Status.PENDING, Status.READY, Status.RUNNING] else: - pending_status = [Status.PENDING.name] + statuses = [Status.PENDING] with self._engine.connect() as conn: - cur_trials = conn.execute( - self._schema.trial.select().where( - self._schema.trial.c.exp_id == self._experiment_id, - ( - self._schema.trial.c.ts_start.is_(None) - | (self._schema.trial.c.ts_start <= timestamp) - ), - self._schema.trial.c.ts_end.is_(None), - self._schema.trial.c.status.in_(pending_status), - ) + stmt = self._schema.trial.select().where( + self._schema.trial.c.exp_id == self._experiment_id, + ( + self._schema.trial.c.ts_start.is_(None) + | (self._schema.trial.c.ts_start <= timestamp) + ), + self._schema.trial.c.ts_end.is_(None), + self._schema.trial.c.status.in_([s.name for s in statuses]), ) + if trial_runner_assigned: + stmt.where(self._schema.trial.c.trial_runner_id.isnot(None)) + elif trial_runner_assigned is False: + stmt.where(self._schema.trial.c.trial_runner_id.is_(None)) + # else: # No filtering by trial_runner_id + cur_trials = conn.execute(stmt) for trial in cur_trials.fetchall(): tunables = self._get_key_val( conn, From a10529a058a7a729cf458c208553dc986769918a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 16:31:22 -0500 Subject: [PATCH 006/109] doc improvements --- .../mlos_bench/schedulers/base_scheduler.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 5bbbedfbca..c3706e2d10 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -500,9 +500,24 @@ def run_schedule(self, running: bool = False) -> None: Check for :py:class:`.Trial`s with `:py:attr:`.Status.PENDING` and an assigned :py:attr:`~.Trial.trial_runner_id` in the queue and run them with :py:meth:`~.Scheduler.run_trial`. + + Subclasses can override this method to implement a more sophisticated + scheduling policy. + + Parameters + ---------- + running : bool + If True, run the trials that are already in a "running" state (e.g., to resume them). + If False (default), run the trials that are pending. """ assert self.experiment is not None - pending_trials = list(self.experiment.pending_trials(datetime.now(UTC), running=running)) + pending_trials = list( + self.experiment.pending_trials( + datetime.now(UTC), + running=running, + trial_runner_assigned=True, + ) + ) for trial in pending_trials: if trial.trial_runner_id is None: logging.warning("Trial %s has no TrialRunner assigned yet.") From b4352236f7cedb4874916656f06d598ef20dea90 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 16:42:32 -0500 Subject: [PATCH 007/109] tweaks --- .../mlos_bench/schedulers/base_scheduler.py | 19 ------------------- .../mlos_bench/schedulers/sync_scheduler.py | 15 --------------- 2 files changed, 34 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 338f36ffa8..1f918afec4 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -269,7 +269,6 @@ def start(self) -> None: self._longest_finished_trial_sequence_id, ) self.run_schedule(is_warm_up) - self.wait_for_trial_runners() not_done = self.add_new_optimizer_suggestions() self.assign_trial_runners( self.experiment.pending_trials( @@ -279,24 +278,6 @@ def start(self) -> None: ) ) is_warm_up = False - self.wait_for_trial_runners(wait_all=True) - - @abstractmethod - def wait_for_trial_runners(self, wait_all: bool = False) -> None: - """ - Wait for (enough) TrialRunners to finish. - - This is a blocking call that waits for enough of the the TrialRunners to finish. - The base class implementation waits for all of the TrialRunners to finish. - However this can be overridden in subclasses to implement a more asynchronous behavior. - - Parameters - ---------- - wait_all : bool - If True, wait for all TrialRunners to finish. - If False, wait for "enough" TrialRunners to finish (which for the - base class is all of them). - """ def teardown(self) -> None: """ diff --git a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py index 4b864942dc..24a43ec806 100644 --- a/mlos_bench/mlos_bench/schedulers/sync_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/sync_scheduler.py @@ -15,21 +15,6 @@ class SyncScheduler(Scheduler): """A simple single-threaded synchronous optimization loop implementation.""" - def start(self) -> None: - """Start the optimization loop.""" - super().start() - - is_warm_up = self.optimizer.supports_preload - if not is_warm_up: - _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) - - not_done = True - while not_done: - _LOG.info("Optimization loop: Last trial ID: %d", self._last_trial_id) - self._run_schedule(is_warm_up) - not_done = self._schedule_new_optimizer_suggestions() - is_warm_up = False - def run_trial(self, trial: Storage.Trial) -> None: """ Set up and run a single trial. From 71d3f2445aa0ae148de043501f49670b751b905b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 16:58:02 -0500 Subject: [PATCH 008/109] fixups --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 2 +- mlos_bench/mlos_bench/storage/base_storage.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 1f918afec4..4583810c12 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -28,6 +28,7 @@ class Scheduler(ContextManager, metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes + # pylint: disable=too-many-public-methods """Base class for the optimization loop scheduling policies.""" def __init__( # pylint: disable=too-many-arguments @@ -274,7 +275,6 @@ def start(self) -> None: self.experiment.pending_trials( datetime.now(UTC), running=False, - trial_runner_assigned=False, ) ) is_warm_up = False diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index f5b5d5a43a..f588745592 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -24,7 +24,7 @@ import logging from abc import ABCMeta, abstractmethod -from collections.abc import Iterator, Mapping +from collections.abc import Iterable, Iterator, Mapping from contextlib import AbstractContextManager as ContextManager from datetime import datetime from types import TracebackType From 263452ae83b15fc2e12f73ec538cd00fd18b44fa Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 17:23:41 -0500 Subject: [PATCH 009/109] doc fixups --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 11 ++++++----- mlos_bench/mlos_bench/storage/base_storage.py | 7 +++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index c3706e2d10..e872be4ff1 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -349,7 +349,7 @@ def add_trial_to_queue( tunables : TunableGroups The tunable configuration to add to the queue. - ts_start : datetime | None + ts_start : datetime.datetime | None Optional timestamp to use to start the trial. Notes @@ -405,7 +405,8 @@ def _add_trial_to_queue( def assign_trial_runners(self, trials: Iterable[Storage.Trial]) -> None: """ - Assigns :py:class:`~.TrialRunner`s to the given :py:class:`~.Trial`s in batch. + Assigns a :py:class:`~.TrialRunner` to each :py:class:`~.Storage.Trial` in + the batch. The base class implements a simple round-robin scheduling algorithm for each Trial in sequence. @@ -497,9 +498,9 @@ def run_schedule(self, running: bool = False) -> None: """ Runs the current schedule of trials. - Check for :py:class:`.Trial`s with `:py:attr:`.Status.PENDING` and an - assigned :py:attr:`~.Trial.trial_runner_id` in the queue and run them - with :py:meth:`~.Scheduler.run_trial`. + Check for :py:class:`~.Storage.Trial` instances with `:py:attr:`.Status.PENDING` + and an assigned :py:attr:`~.Storage.Trial.trial_runner_id` in the queue and run + them with :py:meth:`~.Scheduler.run_trial`. Subclasses can override this method to implement a more sophisticated scheduling policy. diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 460a8ce488..c64ab5457c 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -316,10 +316,9 @@ def pending_trials( trial_runner_assigned: bool | None = None, ) -> Iterator["Storage.Trial"]: """ - Return an iterator over the :py:class:`~.Storage.Trial`s that are - :py:attr:`~.Status.PENDING` and have a scheduled - :py:attr:`~.Storage.Trial.ts_start` time to run on or before the specified - timestamp. + Return an iterator over :py:attr:`~.Status.PENDING` + :py:class:`~.Storage.Trial` instances that have a scheduled start + time to run on or before the specified timestamp. Parameters ---------- From 9398b1817e6582c0c949d57e1b671413658a472c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 9 May 2025 22:24:04 +0000 Subject: [PATCH 010/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 4 ++-- mlos_bench/mlos_bench/storage/base_storage.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index e872be4ff1..58b3675397 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -405,8 +405,8 @@ def _add_trial_to_queue( def assign_trial_runners(self, trials: Iterable[Storage.Trial]) -> None: """ - Assigns a :py:class:`~.TrialRunner` to each :py:class:`~.Storage.Trial` in - the batch. + Assigns a :py:class:`~.TrialRunner` to each :py:class:`~.Storage.Trial` in the + batch. The base class implements a simple round-robin scheduling algorithm for each Trial in sequence. diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index c64ab5457c..52ecf93b7d 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -317,8 +317,8 @@ def pending_trials( ) -> Iterator["Storage.Trial"]: """ Return an iterator over :py:attr:`~.Status.PENDING` - :py:class:`~.Storage.Trial` instances that have a scheduled start - time to run on or before the specified timestamp. + :py:class:`~.Storage.Trial` instances that have a scheduled start time to + run on or before the specified timestamp. Parameters ---------- From 281dbf465f71623a4101ea9f49f6660120181c51 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 9 May 2025 17:53:10 -0500 Subject: [PATCH 011/109] add tests and fixes --- .../mlos_bench/storage/sql/experiment.py | 5 +- .../tests/storage/trial_schedule_test.py | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 30ce3a11a3..2d7db8e34e 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -235,7 +235,6 @@ def _get_key_val(conn: Connection, table: Table, field: str, **kwargs: Any) -> d row._tuple() for row in cur_result.fetchall() # pylint: disable=protected-access ) - # TODO: Add tests for trial_runner_assigned filtering. def pending_trials( self, timestamp: datetime, @@ -260,9 +259,9 @@ def pending_trials( self._schema.trial.c.status.in_([s.name for s in statuses]), ) if trial_runner_assigned: - stmt.where(self._schema.trial.c.trial_runner_id.isnot(None)) + stmt = stmt.where(self._schema.trial.c.trial_runner_id.isnot(None)) elif trial_runner_assigned is False: - stmt.where(self._schema.trial.c.trial_runner_id.is_(None)) + stmt = stmt.where(self._schema.trial.c.trial_runner_id.is_(None)) # else: # No filtering by trial_runner_id cur_trials = conn.execute(stmt) for trial in cur_trials.fetchall(): diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index aaf545c787..faefe4998b 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -61,6 +61,53 @@ def test_schedule_trial( trial_now2_data = exp_data.trials[trial_now2.trial_id] assert trial_now2_data.trial_runner_id == trial_now2.trial_runner_id + # --- Test the trial_runner_assigned parameter --- + # At this point: + # - trial_now1: no trial_runner assigned + # - trial_now2: trial_runner assigned + # - trial_1h, trial_2h: no trial_runner assigned + + # All pending trials (should include all 4) + all_pending = _trial_ids( + exp_storage.pending_trials( + timestamp + timedelta_1hr * 3, + running=False, + trial_runner_assigned=None, + ) + ) + assert all_pending == { + trial_now1.trial_id, + trial_now2.trial_id, + trial_1h.trial_id, + trial_2h.trial_id, + }, f"Expected all pending trials, got {all_pending}" + + # Only those with a trial_runner assigned + assigned_pending = _trial_ids( + exp_storage.pending_trials( + timestamp + timedelta_1hr * 3, + running=False, + trial_runner_assigned=True, + ) + ) + assert assigned_pending == { + trial_now2.trial_id + }, f"Expected only trials with a runner assigned, got {assigned_pending}" + + # Only those without a trial_runner assigned + unassigned_pending = _trial_ids( + exp_storage.pending_trials( + timestamp + timedelta_1hr * 3, + running=False, + trial_runner_assigned=False, + ) + ) + assert unassigned_pending == { + trial_now1.trial_id, + trial_1h.trial_id, + trial_2h.trial_id, + }, f"Expected only trials without a runner assigned, got {unassigned_pending}" + # Scheduler side: get trials ready to run at certain timestamps: # Pretend 1 minute has passed, get trials scheduled to run: From defeb2848689883336739a6ad0c8e4944f43e67e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 10 May 2025 10:06:22 -0500 Subject: [PATCH 012/109] fixup suggested --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 58b3675397..e2403333bb 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -520,9 +520,9 @@ def run_schedule(self, running: bool = False) -> None: ) ) for trial in pending_trials: - if trial.trial_runner_id is None: - logging.warning("Trial %s has no TrialRunner assigned yet.") - continue + assert ( + trial.trial_runner_id is not None + ), f"Trial {trial} has no TrialRunner assigned yet." self.run_trial(trial) def not_done(self) -> bool: From c7886db5e0622a9f1b8d18640b94d1f62210dcb4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 21:14:07 +0000 Subject: [PATCH 013/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../mlos_bench/schedulers/base_scheduler.py | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index bf3c96819c..a6eb893e1b 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -437,43 +437,43 @@ def _add_trial_to_queue( def assign_trial_runners(self, trials: Iterable[Storage.Trial]) -> None: """ -<<<<<<< HEAD - Assigns :py:class:`~.TrialRunner`s to the given :py:class:`~.Trial`s in batch. -======= - Assigns a :py:class:`~.TrialRunner` to each :py:class:`~.Storage.Trial` in the - batch. ->>>>>>> refactor/rename-reorg-scheduler-methods - - The base class implements a simple round-robin scheduling algorithm for - each Trial in sequence. - - Subclasses can override this method to implement a more sophisticated policy. - For instance:: - - def assign_trial_runners( - self, - trials: Iterable[Storage.Trial], - ) -> TrialRunner: - trial_runners_map = {} - # Implement a more sophisticated policy here. - # For example, to assign the Trial to the TrialRunner with the least - # number of running Trials. - # Or assign the Trial to the TrialRunner that hasn't executed this - # TunableValues Config yet. - for (trial, trial_runner) in trial_runners_map: - # Call the base class method to assign the TrialRunner in the Trial's metadata. - trial.set_trial_runner(trial_runner) - ... - - Notes - ----- - Subclasses are *not* required to assign a TrialRunner to the Trial - (e.g., if the Trial should be deferred to a later time). - - Parameters - ---------- - trials : Iterable[Storage.Trial] - The trial to assign a TrialRunner to. + <<<<<<< HEAD + Assigns :py:class:`~.TrialRunner`s to the given :py:class:`~.Trial`s in batch. + ======= + Assigns a :py:class:`~.TrialRunner` to each :py:class:`~.Storage.Trial` in the + batch. + >>>>>>> refactor/rename-reorg-scheduler-methods + + The base class implements a simple round-robin scheduling algorithm for + each Trial in sequence. + + Subclasses can override this method to implement a more sophisticated policy. + For instance:: + + def assign_trial_runners( + self, + trials: Iterable[Storage.Trial], + ) -> TrialRunner: + trial_runners_map = {} + # Implement a more sophisticated policy here. + # For example, to assign the Trial to the TrialRunner with the least + # number of running Trials. + # Or assign the Trial to the TrialRunner that hasn't executed this + # TunableValues Config yet. + for (trial, trial_runner) in trial_runners_map: + # Call the base class method to assign the TrialRunner in the Trial's metadata. + trial.set_trial_runner(trial_runner) + ... + + Notes + ----- + Subclasses are *not* required to assign a TrialRunner to the Trial + (e.g., if the Trial should be deferred to a later time). + + Parameters + ---------- + trials : Iterable[Storage.Trial] + The trial to assign a TrialRunner to. """ for trial in trials: if trial.trial_runner_id is not None: From 287f8151d26d53922a3ae3f5f90a8a80749d08ff Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 12 May 2025 16:14:39 -0500 Subject: [PATCH 014/109] slight reorg --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index a6eb893e1b..658a83fb43 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -101,8 +101,8 @@ def __init__( # pylint: disable=too-many-arguments self._optimizer = optimizer self._storage = storage self._root_env_config = root_env_config - self._longest_finished_trial_sequence_id = -1 self._ran_trials: list[Storage.Trial] = [] + self._longest_finished_trial_sequence_id = -1 self._registered_trial_ids: set[int] = set() _LOG.debug("Scheduler instantiated: %s :: %s", self, config) From 0811b7ab32871e5a10641b85caee3a10da65d5ca Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 12 May 2025 16:16:19 -0500 Subject: [PATCH 015/109] fix bad merge --- .../mlos_bench/schedulers/base_scheduler.py | 70 +++++++++---------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 658a83fb43..d840132731 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -437,43 +437,39 @@ def _add_trial_to_queue( def assign_trial_runners(self, trials: Iterable[Storage.Trial]) -> None: """ - <<<<<<< HEAD - Assigns :py:class:`~.TrialRunner`s to the given :py:class:`~.Trial`s in batch. - ======= - Assigns a :py:class:`~.TrialRunner` to each :py:class:`~.Storage.Trial` in the - batch. - >>>>>>> refactor/rename-reorg-scheduler-methods - - The base class implements a simple round-robin scheduling algorithm for - each Trial in sequence. - - Subclasses can override this method to implement a more sophisticated policy. - For instance:: - - def assign_trial_runners( - self, - trials: Iterable[Storage.Trial], - ) -> TrialRunner: - trial_runners_map = {} - # Implement a more sophisticated policy here. - # For example, to assign the Trial to the TrialRunner with the least - # number of running Trials. - # Or assign the Trial to the TrialRunner that hasn't executed this - # TunableValues Config yet. - for (trial, trial_runner) in trial_runners_map: - # Call the base class method to assign the TrialRunner in the Trial's metadata. - trial.set_trial_runner(trial_runner) - ... - - Notes - ----- - Subclasses are *not* required to assign a TrialRunner to the Trial - (e.g., if the Trial should be deferred to a later time). - - Parameters - ---------- - trials : Iterable[Storage.Trial] - The trial to assign a TrialRunner to. + Assigns a :py:class:`~.TrialRunner` to each :py:class:`~.Storage.Trial` in the + batch. + + The base class implements a simple round-robin scheduling algorithm for + each Trial in sequence. + + Subclasses can override this method to implement a more sophisticated policy. + For instance:: + + def assign_trial_runners( + self, + trials: Iterable[Storage.Trial], + ) -> TrialRunner: + trial_runners_map = {} + # Implement a more sophisticated policy here. + # For example, to assign the Trial to the TrialRunner with the least + # number of running Trials. + # Or assign the Trial to the TrialRunner that hasn't executed this + # TunableValues Config yet. + for (trial, trial_runner) in trial_runners_map: + # Call the base class method to assign the TrialRunner in the Trial's metadata. + trial.set_trial_runner(trial_runner) + ... + + Notes + ----- + Subclasses are *not* required to assign a TrialRunner to the Trial + (e.g., if the Trial should be deferred to a later time). + + Parameters + ---------- + trials : Iterable[Storage.Trial] + The trial to assign a TrialRunner to. """ for trial in trials: if trial.trial_runner_id is not None: From c26aaf132faca8a7cf25cbd1fe8c8664b4c1f26f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 12 May 2025 16:17:14 -0500 Subject: [PATCH 016/109] fix doc syntax --- mlos_bench/mlos_bench/storage/base_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 6a79afa60d..e0c70e5887 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -306,7 +306,7 @@ def load( that were scheduled *after* the given trial ID. Otherwise, return data from ALL merged-in experiments and attempt to impute the missing tunable values. - Additionally, if `omit_registered_trial_ids` is provided, omit the + Additionally, if ``omit_registered_trial_ids`` is provided, omit the trials matching those ids. The parameters together allow us to efficiently load data from From 74bfb043df006446e39f68cd3e7840d6b01a5c7e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 10:47:03 -0500 Subject: [PATCH 017/109] Add tests for Status type conversion --- mlos_bench/mlos_bench/environments/status.py | 7 +- .../tests/environments/test_status.py | 82 +++++++++++++++++++ 2 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/environments/test_status.py diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 0341698d34..70107f3f84 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -23,15 +23,18 @@ class Status(enum.Enum): TIMED_OUT = 7 @staticmethod - def from_str(status_str: str) -> "Status": + def from_str(status_str: str | int) -> "Status": """Convert a string to a Status enum.""" + if not isinstance(status_str, str): + _LOG.warning("Expected type %s for status: %s", type(status_str), status_str) + status_str = str(status_str) if status_str.isdigit(): try: return Status(int(status_str)) except ValueError: _LOG.warning("Unknown status: %d", int(status_str)) try: - status_str = status_str.upper() + status_str = status_str.upper().strip() return Status[status_str] except KeyError: _LOG.warning("Unknown status: %s", status_str) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py new file mode 100644 index 0000000000..2d5d2a27d4 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -0,0 +1,82 @@ +""" +Unit tests for the :py:class:`mlos_bench.environments.status.Status` class. + +Tests the :py:meth:`mlos_bench.environments.status.Status.from_str` static method +for correct parsing of both numeric and string representations of each Status, +as well as handling of invalid input. +""" + +from typing import Any + +import pytest + +from mlos_bench.environments.status import Status + +@pytest.mark.parametrize( + ["input_str", "expected_status"], + [ + ("UNKNOWN", Status.UNKNOWN), + ("0", Status.UNKNOWN), + ("PENDING", Status.PENDING), + ("1", Status.PENDING), + ("READY", Status.READY), + ("2", Status.READY), + ("RUNNING", Status.RUNNING), + ("3", Status.RUNNING), + ("SUCCEEDED", Status.SUCCEEDED), + ("4", Status.SUCCEEDED), + ("CANCELED", Status.CANCELED), + ("5", Status.CANCELED), + ("FAILED", Status.FAILED), + ("6", Status.FAILED), + ("TIMED_OUT", Status.TIMED_OUT), + ("7", Status.TIMED_OUT), + (" TIMED_OUT ", Status.TIMED_OUT), + ] +) +def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: + """ + Test :py:meth:`Status.from_str` with valid string and numeric representations. + + Parameters + ---------- + input_str : str + String representation of the status. + expected_status : Status + Expected Status enum value. + """ + assert ( + Status.from_str(input_str) == expected_status + ), f"Expected {expected_status} for input: {input_str}" + # Check lowercase representation + assert ( + Status.from_str(input_str.lower()) == expected_status + ), f"Expected {expected_status} for input: {input_str.lower()}" + if input_str.isdigit(): + # Also test the numeric representation + assert ( + Status.from_str(int(input_str)) == expected_status # type: ignore + ), f"Expected {expected_status} for input: {int(input_str)}" + + +@pytest.mark.parametrize( + "invalid_input", + [ + "UNKNOWABLE", + "8", + "-1", + "successful", + "", + None, + 123, + [], + {}, + ] +) +def test_status_from_str_invalid(invalid_input: Any) -> None: + """ + Test :py:meth:`Status.from_str` raises ValueError for invalid input. + """ + assert ( + Status.from_str(invalid_input) == Status.UNKNOWN + ), f"Expected Status.UNKNOWN for invalid input: {invalid_input}" From c77f31ba90634825c1e0282784f9d84cea7db878 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 15:51:58 +0000 Subject: [PATCH 018/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../mlos_bench/tests/environments/test_status.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 2d5d2a27d4..234ee6dad5 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -1,3 +1,7 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# """ Unit tests for the :py:class:`mlos_bench.environments.status.Status` class. @@ -12,6 +16,7 @@ from mlos_bench.environments.status import Status + @pytest.mark.parametrize( ["input_str", "expected_status"], [ @@ -32,7 +37,7 @@ ("TIMED_OUT", Status.TIMED_OUT), ("7", Status.TIMED_OUT), (" TIMED_OUT ", Status.TIMED_OUT), - ] + ], ) def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: """ @@ -71,12 +76,10 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: 123, [], {}, - ] + ], ) def test_status_from_str_invalid(invalid_input: Any) -> None: - """ - Test :py:meth:`Status.from_str` raises ValueError for invalid input. - """ + """Test :py:meth:`Status.from_str` raises ValueError for invalid input.""" assert ( Status.from_str(invalid_input) == Status.UNKNOWN ), f"Expected Status.UNKNOWN for invalid input: {invalid_input}" From 42258629f099468cc35ac47e944b5a656ff7c108 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 11:41:28 -0500 Subject: [PATCH 019/109] improvements on docstring instructions --- .github/instructions/python.instructions.md | 8 ++++++++ mlos_bench/mlos_bench/environments/status.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/instructions/python.instructions.md b/.github/instructions/python.instructions.md index df4f92b5ec..0453691a62 100644 --- a/.github/instructions/python.instructions.md +++ b/.github/instructions/python.instructions.md @@ -8,6 +8,14 @@ applyTo: '**/*.py' - All functions, methods, classes, and attributes should have docstrings. +- Docstrings should be formatted using the [NumPy style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html) format. + + - The docstring should include a summary of the function or method's purpose, followed by a description of its parameters and return values. + + - The docstring should also include any exceptions that may be raised by the function or method. + + - Where possible docstring should include an executable example of how to use the function or method, including any important details about its usage. + - Docstrings should include Sphinx style crossref directives for functions, methods, classes, attributes, and data whenever possible using `:py:class:` or `:py:func:` or `:py:meth:` or `:py:attr:` or `:py:data` syntax, respectively, See Also diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 70107f3f84..0dfbde066d 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -23,7 +23,7 @@ class Status(enum.Enum): TIMED_OUT = 7 @staticmethod - def from_str(status_str: str | int) -> "Status": + def from_str(status_str: str) -> "Status": """Convert a string to a Status enum.""" if not isinstance(status_str, str): _LOG.warning("Expected type %s for status: %s", type(status_str), status_str) From 3d8a783d454a0054cb07543c66c9278bf4be317e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 11:53:18 -0500 Subject: [PATCH 020/109] type tweaks --- mlos_bench/mlos_bench/environments/status.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 0dfbde066d..f2b7d83f52 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -6,6 +6,7 @@ import enum import logging +from typing import Any _LOG = logging.getLogger(__name__) @@ -23,7 +24,7 @@ class Status(enum.Enum): TIMED_OUT = 7 @staticmethod - def from_str(status_str: str) -> "Status": + def from_str(status_str: Any) -> "Status": """Convert a string to a Status enum.""" if not isinstance(status_str, str): _LOG.warning("Expected type %s for status: %s", type(status_str), status_str) From 446502fe40b721f27f17e467394223a894e4d311 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 11:41:28 -0500 Subject: [PATCH 021/109] improvements on docstring instructions --- .github/instructions/python.instructions.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/instructions/python.instructions.md b/.github/instructions/python.instructions.md index df4f92b5ec..0453691a62 100644 --- a/.github/instructions/python.instructions.md +++ b/.github/instructions/python.instructions.md @@ -8,6 +8,14 @@ applyTo: '**/*.py' - All functions, methods, classes, and attributes should have docstrings. +- Docstrings should be formatted using the [NumPy style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html) format. + + - The docstring should include a summary of the function or method's purpose, followed by a description of its parameters and return values. + + - The docstring should also include any exceptions that may be raised by the function or method. + + - Where possible docstring should include an executable example of how to use the function or method, including any important details about its usage. + - Docstrings should include Sphinx style crossref directives for functions, methods, classes, attributes, and data whenever possible using `:py:class:` or `:py:func:` or `:py:meth:` or `:py:attr:` or `:py:data` syntax, respectively, See Also From 34137a44ba0a1c52a994330ede691c75c40c529a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 11:55:46 -0500 Subject: [PATCH 022/109] Revert "improvements on docstring instructions" This reverts commit 42258629f099468cc35ac47e944b5a656ff7c108. --- .github/instructions/python.instructions.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/instructions/python.instructions.md b/.github/instructions/python.instructions.md index 0453691a62..df4f92b5ec 100644 --- a/.github/instructions/python.instructions.md +++ b/.github/instructions/python.instructions.md @@ -8,14 +8,6 @@ applyTo: '**/*.py' - All functions, methods, classes, and attributes should have docstrings. -- Docstrings should be formatted using the [NumPy style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html) format. - - - The docstring should include a summary of the function or method's purpose, followed by a description of its parameters and return values. - - - The docstring should also include any exceptions that may be raised by the function or method. - - - Where possible docstring should include an executable example of how to use the function or method, including any important details about its usage. - - Docstrings should include Sphinx style crossref directives for functions, methods, classes, attributes, and data whenever possible using `:py:class:` or `:py:func:` or `:py:meth:` or `:py:attr:` or `:py:data` syntax, respectively, See Also From 8098ed6df0912b39f84e6c514ffe2b9504071dd3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 12:29:22 -0500 Subject: [PATCH 023/109] new test for get longest idle prefix --- .../tests/storage/trial_schedule_test.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index faefe4998b..7edca09fd2 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -6,6 +6,7 @@ from collections.abc import Iterator from datetime import datetime, timedelta +from random import random from pytz import UTC from mlos_bench.environments.status import Status @@ -220,3 +221,39 @@ def test_rr_scheduling(exp_data: ExperimentData) -> None: assert ( trial.trial_runner_id == expected_runner_id ), f"Expected trial_runner_id {expected_runner_id} for {trial}" + + +def test_get_longest_prefix_finished_trial_id( + exp_storage: Storage.Experiment, + tunable_groups: TunableGroups, +) -> None: + """ + Test that the longest prefix of finished trials is returned correctly. + + See Also + -------- + Storage.get_longest_prefix_finished_trial_id + """ + timestamp = datetime.now(UTC) + config = {} + metrics = {metric: random() for metric in exp_storage.opt_targets} + + # Create several trials + trials = [exp_storage.new_trial(tunable_groups, config=config) for _ in range(0, 10)] + + # Mark some trials at the beginning and end as finished + trials[0].update(Status.SUCCEEDED, timestamp + timedelta(minutes=1), metrics=metrics) + trials[1].update(Status.FAILED, timestamp + timedelta(minutes=2), metrics=metrics) + trials[2].update(Status.TIMED_OUT, timestamp + timedelta(minutes=3), metrics=metrics) + # Leave trials[3] to trials[7] as PENDING + trials[8].update(Status.CANCELED, timestamp + timedelta(minutes=4), metrics=metrics) + trials[9].update(Status.SUCCEEDED, timestamp + timedelta(minutes=5), metrics=metrics) + + # Retrieve the longest prefix of finished trials starting from trial_id 1 + longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() + + # Assert that the longest prefix includes only the first three trials + assert longest_prefix_id == trials[2].trial_id, ( + f"Expected longest prefix to end at trial_id {trials[2].trial_id}, " + f"but got {longest_prefix_id}" + ) From 4757ac4f8a84b08a1cad02976946364b3095c11a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 12:29:29 -0500 Subject: [PATCH 024/109] pylint --- mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index 7edca09fd2..29fa83f605 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -30,7 +30,7 @@ def test_schedule_trial( exp_storage: Storage.Experiment, tunable_groups: TunableGroups, ) -> None: - # pylint: disable=too-many-locals + # pylint: disable=too-many-locals,too-many-statements """Schedule several trials for future execution and retrieve them later at certain timestamps. """ From ca5c9b22dfa6f952b68362f48955403fe21f8d22 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 17:29:54 +0000 Subject: [PATCH 025/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index 29fa83f605..118d895daa 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -5,8 +5,8 @@ """Unit tests for scheduling trials for some future time.""" from collections.abc import Iterator from datetime import datetime, timedelta - from random import random + from pytz import UTC from mlos_bench.environments.status import Status From 6f78eeadd51ba1052bfafb71905292f0db6dc083 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 12:31:38 -0500 Subject: [PATCH 026/109] fixups --- mlos_bench/mlos_bench/storage/sql/experiment.py | 2 ++ mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 5839ec78d9..43a7bf2016 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -170,6 +170,7 @@ def get_longest_prefix_finished_trial_id(self) -> int: Status.SUCCEEDED.name, Status.FAILED.name, Status.TIMED_OUT.name, + Status.CANCELED.name, ] ), ), @@ -204,6 +205,7 @@ def load( Status.SUCCEEDED.name, Status.FAILED.name, Status.TIMED_OUT.name, + Status.CANCELED.name, ] ), ) diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index 118d895daa..d6df0d468d 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -245,15 +245,15 @@ def test_get_longest_prefix_finished_trial_id( trials[0].update(Status.SUCCEEDED, timestamp + timedelta(minutes=1), metrics=metrics) trials[1].update(Status.FAILED, timestamp + timedelta(minutes=2), metrics=metrics) trials[2].update(Status.TIMED_OUT, timestamp + timedelta(minutes=3), metrics=metrics) + trials[3].update(Status.CANCELED, timestamp + timedelta(minutes=4), metrics=metrics) # Leave trials[3] to trials[7] as PENDING - trials[8].update(Status.CANCELED, timestamp + timedelta(minutes=4), metrics=metrics) trials[9].update(Status.SUCCEEDED, timestamp + timedelta(minutes=5), metrics=metrics) # Retrieve the longest prefix of finished trials starting from trial_id 1 longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() # Assert that the longest prefix includes only the first three trials - assert longest_prefix_id == trials[2].trial_id, ( - f"Expected longest prefix to end at trial_id {trials[2].trial_id}, " + assert longest_prefix_id == trials[3].trial_id, ( + f"Expected longest prefix to end at trial_id {trials[3].trial_id}, " f"but got {longest_prefix_id}" ) From 6b90301bd0a361c951da1ba7aa6dfc513c93b947 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 14:31:18 -0500 Subject: [PATCH 027/109] stash completed statuses in a more reusable way --- mlos_bench/mlos_bench/environments/status.py | 24 ++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 0341698d34..1f756fae5f 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -46,16 +46,17 @@ def is_good(self) -> bool: Status.SUCCEEDED, } + # Class based accessor method to avoid circular import + @staticmethod + def completed_statuses() -> set["Status"]: + """Get the set of :py:data:`.COMPLETED_STATUSES`.""" + return COMPLETED_STATUSES + def is_completed(self) -> bool: """Check if the status of the benchmark/environment Trial or Experiment is one - of {SUCCEEDED, CANCELED, FAILED, TIMED_OUT}. + of :py:data:`.COMPLETED_STATUSES`. """ - return self in { - Status.SUCCEEDED, - Status.CANCELED, - Status.FAILED, - Status.TIMED_OUT, - } + return self in COMPLETED_STATUSES def is_pending(self) -> bool: """Check if the status of the benchmark/environment Trial or Experiment is @@ -92,3 +93,12 @@ def is_timed_out(self) -> bool: TIMED_OUT. """ return self == Status.TIMED_OUT + + +COMPLETED_STATUSES = { + Status.SUCCEEDED, + Status.CANCELED, + Status.FAILED, + Status.TIMED_OUT, +} +"""The set of completed statuses.""" From b80e5e173f4cfce9fe4ab1ae4687736dbd34b050 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 14:37:11 -0500 Subject: [PATCH 028/109] more tests and status tweaks --- mlos_bench/mlos_bench/environments/status.py | 6 + .../tests/environments/test_status.py | 257 ++++++++++++++++++ 2 files changed, 263 insertions(+) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index aff4a25631..7da72e72bd 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -74,6 +74,12 @@ def is_ready(self) -> bool: """ return self == Status.READY + def is_running(self) -> bool: + """Check if the status of the benchmark/environment Trial or Experiment is + RUNNING. + """ + return self == Status.RUNNING + def is_succeeded(self) -> bool: """Check if the status of the benchmark/environment Trial or Experiment is SUCCEEDED. diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 234ee6dad5..56d26030fc 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -83,3 +83,260 @@ def test_status_from_str_invalid(invalid_input: Any) -> None: assert ( Status.from_str(invalid_input) == Status.UNKNOWN ), f"Expected Status.UNKNOWN for invalid input: {invalid_input}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, True), + (Status.READY, True), + (Status.RUNNING, True), + (Status.SUCCEEDED, True), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_good(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_good` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_good method. + """ + assert status.is_good() == expected_result, f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, True), + (Status.CANCELED, True), + (Status.FAILED, True), + (Status.TIMED_OUT, True), + ], +) +def test_status_is_completed(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_completed` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_completed method. + """ + assert ( + status.is_completed() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, True), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_pending(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_pending` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_pending method. + """ + assert ( + status.is_pending() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, True), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_ready(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_ready` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_ready method. + """ + assert status.is_ready() == expected_result, f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, True), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_running(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_running` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_running method. + """ + assert ( + status.is_running() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, True), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_succeeded(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_succeeded` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_succeeded method. + """ + assert ( + status.is_succeeded() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, True), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_canceled(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_canceled` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_canceled method. + """ + assert ( + status.is_canceled() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, True), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_failed(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_failed` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_failed method. + """ + assert ( + status.is_failed() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, True), + ], +) +def test_status_is_timed_out(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_timed_out` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_timed_out method. + """ + assert ( + status.is_timed_out() == expected_result + ), f"Expected {expected_result} for status: {status}" From fbf754847a9bae1783bcb2d92ed30cfbcbd68bfe Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 14:38:21 -0500 Subject: [PATCH 029/109] wip tests and other fixups --- mlos_bench/mlos_bench/storage/base_storage.py | 12 +++--- .../mlos_bench/storage/sql/experiment.py | 38 +++++++++++-------- .../mlos_bench/tests/storage/sql/fixtures.py | 9 +++++ .../tests/storage/trial_schedule_test.py | 24 ++++++++++++ 4 files changed, 62 insertions(+), 21 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 0541ce5dbb..5267e21910 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -342,9 +342,11 @@ def load( """ Load (tunable values, benchmark scores, status) to warm-up the optimizer. - If `last_trial_id` is present, load only the data from the (completed) trials - that were scheduled *after* the given trial ID. Otherwise, return data from ALL - merged-in experiments and attempt to impute the missing tunable values. + If `last_trial_id` is present, load only the data from the + (:py:meth:`completed `) trials that were + added *after* the given trial ID. Otherwise, return data from + ALL merged-in experiments and attempt to impute the missing tunable + values. Additionally, if ``omit_registered_trial_ids`` is provided, omit the trials matching those ids. @@ -358,8 +360,8 @@ def load( last_trial_id : int (Optional) Trial ID to start from. omit_registered_trial_ids : Iterable[int] | None = None, - (Optional) List of trial IDs to omit. If None, load all trials. - + (Optional) List of trial IDs to omit. If None, load all trials + after ``last_trial_id``. Returns ------- diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 43a7bf2016..5c75ad52a2 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -164,25 +164,31 @@ def get_longest_prefix_finished_trial_id(self) -> int: ) .where( self._schema.trial.c.exp_id == self._experiment_id, - func.not_( - self._schema.trial.c.status.in_( - [ - Status.SUCCEEDED.name, - Status.FAILED.name, - Status.TIMED_OUT.name, - Status.CANCELED.name, - ] - ), - ), + func.not_(self._schema.trial.c.status.in_(Status.completed_statuses())), ) ) - max_trial_id = conn.execute(first_unfinished_trial_id_stmt).scalar() - if max_trial_id is None: - return -1 - # Return one less than the first unfinished trial ID - it should be - # finished (or not exist, which is fine as a limit). - return int(max_trial_id) - 1 + if max_trial_id is not None: + # Return one less than the first unfinished trial ID - it should be + # finished (or not exist, which is fine as a limit). + return int(max_trial_id) - 1 + + # No unfinished trials, so get the largest completed trial ID. + last_finished_trial_id = ( + self._schema.trial.select() + .with_only_columns( + func.max(self._schema.trial.c.trial_id), + ) + .where( + self._schema.trial.c.exp_id == self._experiment_id, + self._schema.trial.c.status.in_(Status.completed_statuses()), + ) + ) + max_trial_id = conn.execute(last_finished_trial_id).scalar() + if max_trial_id is not None: + return int(max_trial_id) + # Else no trial exist. + return -1 def load( self, diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index cb83bffd4f..4be3f6e14e 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -197,6 +197,15 @@ def _dummy_run_exp( return storage.experiments[exp.experiment_id] +@pytest.fixture +def empty_exp_data( + storage: SqlStorage, + exp_storage: SqlStorage.Experiment, +) -> ExperimentData: + """Test fixture for ExperimentData.""" + return storage.experiments[exp_storage.experiment_id] + + @pytest.fixture def exp_data( storage: SqlStorage, diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index d6df0d468d..7e15fd4f1e 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -223,6 +223,30 @@ def test_rr_scheduling(exp_data: ExperimentData) -> None: ), f"Expected trial_runner_id {expected_runner_id} for {trial}" +def test_empty_get_longest_prefix_finished_trial_id( + storage: Storage, + exp_storage: Storage.Experiment, +) -> None: + """ + Test that the longest prefix of finished trials is empty when no trials are present. + + See Also + -------- + Storage.get_longest_prefix_finished_trial_id + """ + assert not storage.experiments[ + exp_storage.experiment_id + ].trials, "Expected no trials in the experiment." + + # Retrieve the longest prefix of finished trials when no trials are present + longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() + + # Assert that the longest prefix is empty + assert ( + longest_prefix_id == -1 + ), f"Expected longest prefix to be -1, but got {longest_prefix_id}" + + def test_get_longest_prefix_finished_trial_id( exp_storage: Storage.Experiment, tunable_groups: TunableGroups, From eebb21454b14b93e6e72b8ec1b7bb9a934cdf86f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 14:31:18 -0500 Subject: [PATCH 030/109] stash completed statuses in a more reusable way --- mlos_bench/mlos_bench/environments/status.py | 24 ++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index f2b7d83f52..aff4a25631 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -50,16 +50,17 @@ def is_good(self) -> bool: Status.SUCCEEDED, } + # Class based accessor method to avoid circular import + @staticmethod + def completed_statuses() -> set["Status"]: + """Get the set of :py:data:`.COMPLETED_STATUSES`.""" + return COMPLETED_STATUSES + def is_completed(self) -> bool: """Check if the status of the benchmark/environment Trial or Experiment is one - of {SUCCEEDED, CANCELED, FAILED, TIMED_OUT}. + of :py:data:`.COMPLETED_STATUSES`. """ - return self in { - Status.SUCCEEDED, - Status.CANCELED, - Status.FAILED, - Status.TIMED_OUT, - } + return self in COMPLETED_STATUSES def is_pending(self) -> bool: """Check if the status of the benchmark/environment Trial or Experiment is @@ -96,3 +97,12 @@ def is_timed_out(self) -> bool: TIMED_OUT. """ return self == Status.TIMED_OUT + + +COMPLETED_STATUSES = { + Status.SUCCEEDED, + Status.CANCELED, + Status.FAILED, + Status.TIMED_OUT, +} +"""The set of completed statuses.""" From 15abbb5d561c1ddfacef9e06479e0cd78330b597 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 14:37:11 -0500 Subject: [PATCH 031/109] more tests and status tweaks --- mlos_bench/mlos_bench/environments/status.py | 6 + .../tests/environments/test_status.py | 257 ++++++++++++++++++ 2 files changed, 263 insertions(+) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index aff4a25631..7da72e72bd 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -74,6 +74,12 @@ def is_ready(self) -> bool: """ return self == Status.READY + def is_running(self) -> bool: + """Check if the status of the benchmark/environment Trial or Experiment is + RUNNING. + """ + return self == Status.RUNNING + def is_succeeded(self) -> bool: """Check if the status of the benchmark/environment Trial or Experiment is SUCCEEDED. diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 234ee6dad5..56d26030fc 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -83,3 +83,260 @@ def test_status_from_str_invalid(invalid_input: Any) -> None: assert ( Status.from_str(invalid_input) == Status.UNKNOWN ), f"Expected Status.UNKNOWN for invalid input: {invalid_input}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, True), + (Status.READY, True), + (Status.RUNNING, True), + (Status.SUCCEEDED, True), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_good(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_good` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_good method. + """ + assert status.is_good() == expected_result, f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, True), + (Status.CANCELED, True), + (Status.FAILED, True), + (Status.TIMED_OUT, True), + ], +) +def test_status_is_completed(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_completed` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_completed method. + """ + assert ( + status.is_completed() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, True), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_pending(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_pending` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_pending method. + """ + assert ( + status.is_pending() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, True), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_ready(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_ready` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_ready method. + """ + assert status.is_ready() == expected_result, f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, True), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_running(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_running` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_running method. + """ + assert ( + status.is_running() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, True), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_succeeded(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_succeeded` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_succeeded method. + """ + assert ( + status.is_succeeded() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, True), + (Status.FAILED, False), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_canceled(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_canceled` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_canceled method. + """ + assert ( + status.is_canceled() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, True), + (Status.TIMED_OUT, False), + ], +) +def test_status_is_failed(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_failed` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_failed method. + """ + assert ( + status.is_failed() == expected_result + ), f"Expected {expected_result} for status: {status}" + + +@pytest.mark.parametrize( + ["status", "expected_result"], + [ + (Status.UNKNOWN, False), + (Status.PENDING, False), + (Status.READY, False), + (Status.RUNNING, False), + (Status.SUCCEEDED, False), + (Status.CANCELED, False), + (Status.FAILED, False), + (Status.TIMED_OUT, True), + ], +) +def test_status_is_timed_out(status: Status, expected_result: bool) -> None: + """ + Test :py:meth:`Status.is_timed_out` for various statuses. + + Parameters + ---------- + status : Status + The Status enum value to test. + expected_result : bool + Expected result of the is_timed_out method. + """ + assert ( + status.is_timed_out() == expected_result + ), f"Expected {expected_result} for status: {status}" From 6fd08d8df2f4dafcfa46173064b9b0ba05125dcd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 15:36:23 -0500 Subject: [PATCH 032/109] reorg more functions in scheduler for easier overloading --- .../mlos_bench/schedulers/base_scheduler.py | 59 +++++++++++++++---- mlos_bench/mlos_bench/storage/base_storage.py | 6 ++ 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index d840132731..d72ec171ac 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -272,6 +272,7 @@ def start(self) -> None: self._longest_finished_trial_sequence_id, ) self.run_schedule(is_warm_up) + self.bulk_register_completed_trials() not_done = self.add_new_optimizer_suggestions() self.assign_trial_runners( self.experiment.pending_trials( @@ -314,20 +315,27 @@ def load_tunable_config(self, config_id: int) -> TunableGroups: _LOG.debug("Config %d ::\n%s", config_id, json.dumps(tunable_values, indent=2)) return tunables.copy() - def add_new_optimizer_suggestions(self) -> bool: + def bulk_register_completed_trials(self) -> None: """ - Optimizer part of the loop. - - Load the results of the executed trials into the - :py:class:`~.Optimizer`, suggest new configurations, and add them to the - queue. + Bulk register the most recent completed Trials in the Storage. - Returns - ------- - bool - The return value indicates whether the optimization process should - continue to get suggestions from the Optimizer or not. - See Also: :py:meth:`~.Scheduler.not_done`. + Notes + ----- + This method is called after the Trials have been run (or started) and + the results have been recorded in the Storage by the TrialRunner(s). + + It has logic to handle straggler Trials that finish out of order so + should be usable by both + :py:class:`~mlos_bench.schedulers.SyncScheduler` and async Schedulers. + + See Also + -------- + Scheduler.start + The main loop of the Scheduler. + Storage.Experiment.load + Load the results of the Trials based on some filtering criteria. + Optimizer.bulk_register + Register the results of the Trials in the Optimizer. """ assert self.experiment is not None # Load the results of the trials that have been run since the last time @@ -358,6 +366,33 @@ def add_new_optimizer_suggestions(self) -> bool: if trial_id > self._longest_finished_trial_sequence_id } + def add_new_optimizer_suggestions(self) -> bool: + """ + Optimizer part of the loop. + + Asks the :py:class:`~.Optimizer` for new suggestions and adds them to + the queue. This method is called after the trials have been run and the + results have been loaded into the optimizer. + + Load the results of the executed trials into the + :py:class:`~.Optimizer`, suggest new configurations, and add them to the + queue. + + Returns + ------- + bool + The return value indicates whether the optimization process should + continue to get suggestions from the Optimizer or not. + See Also: :py:meth:`~.Scheduler.not_done`. + + See Also + -------- + Scheduler.not_done + The stopping conditions for the optimization process. + + Scheduler.bulk_register_completed_trials + Bulk register the most recent completed trials in the storage. + """ # Check if the optimizer has converged or not. not_done = self.not_done() if not_done: diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 5267e21910..81e8148d26 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -367,6 +367,12 @@ def load( ------- (trial_ids, configs, scores, status) : ([int], [dict], [dict] | None, [Status]) Trial ids, Tunable values, benchmark scores, and status of the trials. + + See Also + -------- + Storage.Experiment.get_longest_prefix_finished_trial_id : + Get the last (registered) trial ID for the experiment. + Scheduler.add_new_optimizer_suggestions """ @abstractmethod From de92c39c2b58a48678bd6466cdd55943483f4869 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 15:37:31 -0500 Subject: [PATCH 033/109] comments --- mlos_bench/mlos_bench/schedulers/base_scheduler.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index d72ec171ac..161a1c5404 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -385,6 +385,11 @@ def add_new_optimizer_suggestions(self) -> bool: continue to get suggestions from the Optimizer or not. See Also: :py:meth:`~.Scheduler.not_done`. + Notes + ----- + Subclasses can override this method to implement a more sophisticated + scheduling policy using the information obtained from the Optimizer. + See Also -------- Scheduler.not_done From 9cc55af847b8740f050848d0f81ad10fc2d2eb07 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 15:37:46 -0500 Subject: [PATCH 034/109] test fixups --- .../mlos_bench/tests/storage/exp_load_test.py | 268 +++++++++++++++++- .../mlos_bench/tests/storage/sql/fixtures.py | 9 - .../tests/storage/trial_schedule_test.py | 60 ---- 3 files changed, 267 insertions(+), 70 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py index e07cf80c70..cd8133fe04 100644 --- a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py +++ b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py @@ -3,8 +3,11 @@ # Licensed under the MIT License. # """Unit tests for the storage subsystem.""" -from datetime import datetime, tzinfo +from atexit import register +from datetime import datetime, timedelta, tzinfo +from random import random +from more_itertools import last import pytest from pytz import UTC @@ -157,3 +160,266 @@ def test_exp_trial_pending_3( assert status == [Status.FAILED, Status.SUCCEEDED] assert tunable_groups.copy().assign(configs[0]).reset() == trial_fail.tunables assert tunable_groups.copy().assign(configs[1]).reset() == trial_succ.tunables + + +def test_empty_get_longest_prefix_finished_trial_id( + storage: Storage, + exp_storage: Storage.Experiment, +) -> None: + """ + Test that the longest prefix of finished trials is empty when no trials are present. + """ + assert not storage.experiments[ + exp_storage.experiment_id + ].trials, "Expected no trials in the experiment." + + # Retrieve the longest prefix of finished trials when no trials are present + longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() + + # Assert that the longest prefix is empty + assert ( + longest_prefix_id == -1 + ), f"Expected longest prefix to be -1, but got {longest_prefix_id}" + + +def test_sync_success_get_longest_prefix_finished_trial_id( + exp_storage: Storage.Experiment, + tunable_groups: TunableGroups, +) -> None: + """ + Test that the longest prefix of finished trials is returned correctly when + all trial are finished. + """ + timestamp = datetime.now(UTC) + config = {} + metrics = {metric: random() for metric in exp_storage.opt_targets} + + # Create several trials + trials = [exp_storage.new_trial(tunable_groups, config=config) for _ in range(0, 4)] + + # Mark some trials at the beginning and end as finished + trials[0].update(Status.SUCCEEDED, timestamp + timedelta(minutes=1), metrics=metrics) + trials[1].update(Status.FAILED, timestamp + timedelta(minutes=2), metrics=metrics) + trials[2].update(Status.TIMED_OUT, timestamp + timedelta(minutes=3), metrics=metrics) + trials[3].update(Status.CANCELED, timestamp + timedelta(minutes=4), metrics=metrics) + + # Retrieve the longest prefix of finished trials starting from trial_id 1 + longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() + + # Assert that the longest prefix includes only the first three trials + assert longest_prefix_id == trials[3].trial_id, ( + f"Expected longest prefix to end at trial_id {trials[3].trial_id}, " + f"but got {longest_prefix_id}" + ) + + +def test_async_get_longest_prefix_finished_trial_id( + exp_storage: Storage.Experiment, + tunable_groups: TunableGroups, +) -> None: + """ + Test that the longest prefix of finished trials is returned correctly when + trial finish out of order. + """ + timestamp = datetime.now(UTC) + config = {} + metrics = {metric: random() for metric in exp_storage.opt_targets} + + # Create several trials + trials = [exp_storage.new_trial(tunable_groups, config=config) for _ in range(0, 10)] + + # Mark some trials at the beginning and end as finished + trials[0].update(Status.SUCCEEDED, timestamp + timedelta(minutes=1), metrics=metrics) + trials[1].update(Status.FAILED, timestamp + timedelta(minutes=2), metrics=metrics) + trials[2].update(Status.TIMED_OUT, timestamp + timedelta(minutes=3), metrics=metrics) + trials[3].update(Status.CANCELED, timestamp + timedelta(minutes=4), metrics=metrics) + # Leave trials[3] to trials[7] as PENDING + trials[9].update(Status.SUCCEEDED, timestamp + timedelta(minutes=5), metrics=metrics) + + # Retrieve the longest prefix of finished trials starting from trial_id 1 + longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() + + # Assert that the longest prefix includes only the first three trials + assert longest_prefix_id == trials[3].trial_id, ( + f"Expected longest prefix to end at trial_id {trials[3].trial_id}, " + f"but got {longest_prefix_id}" + ) + + +# TODO: Can we simplify this to use something like SyncScheduler and +# bulk_register_completed_trials? +def test_exp_load_async( + exp_storage: Storage.Experiment, + tunable_groups: TunableGroups, +) -> None: + """ + Test the `omit_registered_trial_ids` argument of the `Experiment.load()` method. + + Create several trials with mixed statuses (PENDING and completed). + Verify that completed trials included in a local set of registered configs + are omitted from the `load` operation. + """ + # pylint: disable=too-many-locals,too-many-statements + + last_trial_id = exp_storage.get_longest_prefix_finished_trial_id() + assert last_trial_id == -1, "Expected no trials in the experiment." + registered_trial_ids: set[int] = set() + + # Load trials, omitting registered ones + trial_ids, configs, scores, status = exp_storage.load( + last_trial_id=last_trial_id, + omit_registered_trial_ids=registered_trial_ids, + ) + + assert trial_ids == [] + assert configs == [] + assert scores == [] + assert status == [] + + # Create trials with mixed statuses + trial_1_success = exp_storage.new_trial(tunable_groups) + trial_2_failed = exp_storage.new_trial(tunable_groups) + trial_3_pending = exp_storage.new_trial(tunable_groups) + trial_4_timedout = exp_storage.new_trial(tunable_groups) + trial_5_pending = exp_storage.new_trial(tunable_groups) + + # Update statuses for completed trials + trial_1_success.update(Status.SUCCEEDED, datetime.now(UTC), {"score": 95.0}) + trial_2_failed.update(Status.FAILED, datetime.now(UTC), {"score": -1}) + trial_4_timedout.update(Status.TIMED_OUT, datetime.now(UTC), {"score": -1}) + + # Now evaluate some different sequences of loading trials by simulating what + # we expect a Scheduler to do. + # See Also: Scheduler.add_new_optimizer_suggestions() + + trial_ids, configs, scores, status = exp_storage.load( + last_trial_id=last_trial_id, + omit_registered_trial_ids=registered_trial_ids, + ) + + # Verify that all completed trials are returned. + completed_trials = [ + trial_1_success, + trial_2_failed, + trial_4_timedout, + ] + assert trial_ids == [trial.trial_id for trial in completed_trials] + assert len(configs) == len(completed_trials) + assert status == [trial.status for trial in completed_trials] + + last_trial_id = exp_storage.get_longest_prefix_finished_trial_id() + assert last_trial_id == trial_2_failed.trial_id, ( + f"Expected longest prefix to end at trial_id {trial_2_failed.trial_id}, " + f"but got {last_trial_id}" + ) + registered_trial_ids |= {completed_trial.trial_id for completed_trial in completed_trials} + registered_trial_ids = {i for i in registered_trial_ids if i > last_trial_id} + + # Create some more trials and update their statuses. + # Note: we are leaving some trials in the middle in the PENDING state. + trial_6_canceled = exp_storage.new_trial(tunable_groups) + trial_7_success2 = exp_storage.new_trial(tunable_groups) + trial_6_canceled.update(Status.CANCELED, datetime.now(UTC), {"score": -1}) + trial_7_success2.update(Status.SUCCEEDED, datetime.now(UTC), {"score": 90.0}) + + # Load trials, omitting registered ones + trial_ids, configs, scores, status = exp_storage.load( + last_trial_id=last_trial_id, + omit_registered_trial_ids=registered_trial_ids, + ) + # Verify that only unregistered completed trials are returned + completed_trials = [ + trial_6_canceled, + trial_7_success2, + ] + assert trial_ids == [trial.trial_id for trial in completed_trials] + assert len(configs) == len(completed_trials) + assert status == [trial.status for trial in completed_trials] + + # Update our tracking of registered trials + last_trial_id = exp_storage.get_longest_prefix_finished_trial_id() + # Should still be the same as before since we haven't adjusted the PENDING + # trials at the beginning yet. + assert last_trial_id == trial_2_failed.trial_id, ( + f"Expected longest prefix to end at trial_id {trial_2_failed.trial_id}, " + f"but got {last_trial_id}" + ) + registered_trial_ids |= {completed_trial.trial_id for completed_trial in completed_trials} + registered_trial_ids = {i for i in registered_trial_ids if i > last_trial_id} + + trial_ids, configs, scores, status = exp_storage.load( + last_trial_id=last_trial_id, + omit_registered_trial_ids=registered_trial_ids, + ) + + # Verify that only unregistered completed trials are returned + completed_trials = [] + assert trial_ids == [trial.trial_id for trial in completed_trials] + assert len(configs) == len(completed_trials) + assert status == [trial.status for trial in completed_trials] + + # Now update the PENDING trials to be TIMED_OUT. + trial_3_pending.update(Status.TIMED_OUT, datetime.now(UTC), {"score": -1}) + + # Load trials, omitting registered ones + trial_ids, configs, scores, status = exp_storage.load( + last_trial_id=last_trial_id, + omit_registered_trial_ids=registered_trial_ids, + ) + + # Verify that only unregistered completed trials are returned + completed_trials = [ + trial_3_pending, + ] + assert trial_ids == [trial.trial_id for trial in completed_trials] + assert len(configs) == len(completed_trials) + assert status == [trial.status for trial in completed_trials] + + # Update our tracking of registered trials + last_trial_id = exp_storage.get_longest_prefix_finished_trial_id() + assert last_trial_id == trial_4_timedout.trial_id, ( + f"Expected longest prefix to end at trial_id {trial_4_timedout.trial_id}, " + f"but got {last_trial_id}" + ) + registered_trial_ids |= {completed_trial.trial_id for completed_trial in completed_trials} + registered_trial_ids = {i for i in registered_trial_ids if i > last_trial_id} + + # Load trials, omitting registered ones + trial_ids, configs, scores, status = exp_storage.load( + last_trial_id=last_trial_id, + omit_registered_trial_ids=registered_trial_ids, + ) + # Verify that only unregistered completed trials are returned + completed_trials = [] + assert trial_ids == [trial.trial_id for trial in completed_trials] + assert len(configs) == len(completed_trials) + assert status == [trial.status for trial in completed_trials] + # And that the longest prefix is still the same. + assert last_trial_id == trial_4_timedout.trial_id, ( + f"Expected longest prefix to end at trial_id {trial_4_timedout.trial_id}, " + f"but got {last_trial_id}" + ) + + # Mark the last trial as finished. + trial_5_pending.update(Status.SUCCEEDED, datetime.now(UTC), {"score": 95.0}) + # Load trials, omitting registered ones + trial_ids, configs, scores, status = exp_storage.load( + last_trial_id=last_trial_id, + omit_registered_trial_ids=registered_trial_ids, + ) + # Verify that only unregistered completed trials are returned + completed_trials = [ + trial_5_pending, + ] + assert trial_ids == [trial.trial_id for trial in completed_trials] + assert len(configs) == len(completed_trials) + assert status == [trial.status for trial in completed_trials] + # And that the longest prefix is now the last trial. + last_trial_id = exp_storage.get_longest_prefix_finished_trial_id() + assert last_trial_id == trial_7_success2.trial_id, ( + f"Expected longest prefix to end at trial_id {trial_7_success2.trial_id}, " + f"but got {last_trial_id}" + ) + registered_trial_ids |= {completed_trial.trial_id for completed_trial in completed_trials} + registered_trial_ids = {i for i in registered_trial_ids if i > last_trial_id} + assert registered_trial_ids == set() diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 4be3f6e14e..cb83bffd4f 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -197,15 +197,6 @@ def _dummy_run_exp( return storage.experiments[exp.experiment_id] -@pytest.fixture -def empty_exp_data( - storage: SqlStorage, - exp_storage: SqlStorage.Experiment, -) -> ExperimentData: - """Test fixture for ExperimentData.""" - return storage.experiments[exp_storage.experiment_id] - - @pytest.fixture def exp_data( storage: SqlStorage, diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index 7e15fd4f1e..cabf48211d 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -221,63 +221,3 @@ def test_rr_scheduling(exp_data: ExperimentData) -> None: assert ( trial.trial_runner_id == expected_runner_id ), f"Expected trial_runner_id {expected_runner_id} for {trial}" - - -def test_empty_get_longest_prefix_finished_trial_id( - storage: Storage, - exp_storage: Storage.Experiment, -) -> None: - """ - Test that the longest prefix of finished trials is empty when no trials are present. - - See Also - -------- - Storage.get_longest_prefix_finished_trial_id - """ - assert not storage.experiments[ - exp_storage.experiment_id - ].trials, "Expected no trials in the experiment." - - # Retrieve the longest prefix of finished trials when no trials are present - longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() - - # Assert that the longest prefix is empty - assert ( - longest_prefix_id == -1 - ), f"Expected longest prefix to be -1, but got {longest_prefix_id}" - - -def test_get_longest_prefix_finished_trial_id( - exp_storage: Storage.Experiment, - tunable_groups: TunableGroups, -) -> None: - """ - Test that the longest prefix of finished trials is returned correctly. - - See Also - -------- - Storage.get_longest_prefix_finished_trial_id - """ - timestamp = datetime.now(UTC) - config = {} - metrics = {metric: random() for metric in exp_storage.opt_targets} - - # Create several trials - trials = [exp_storage.new_trial(tunable_groups, config=config) for _ in range(0, 10)] - - # Mark some trials at the beginning and end as finished - trials[0].update(Status.SUCCEEDED, timestamp + timedelta(minutes=1), metrics=metrics) - trials[1].update(Status.FAILED, timestamp + timedelta(minutes=2), metrics=metrics) - trials[2].update(Status.TIMED_OUT, timestamp + timedelta(minutes=3), metrics=metrics) - trials[3].update(Status.CANCELED, timestamp + timedelta(minutes=4), metrics=metrics) - # Leave trials[3] to trials[7] as PENDING - trials[9].update(Status.SUCCEEDED, timestamp + timedelta(minutes=5), metrics=metrics) - - # Retrieve the longest prefix of finished trials starting from trial_id 1 - longest_prefix_id = exp_storage.get_longest_prefix_finished_trial_id() - - # Assert that the longest prefix includes only the first three trials - assert longest_prefix_id == trials[3].trial_id, ( - f"Expected longest prefix to end at trial_id {trials[3].trial_id}, " - f"but got {longest_prefix_id}" - ) From 65d7cbc69c781580c98f7593facd4250aad91a29 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:40:03 -0500 Subject: [PATCH 035/109] new config schemas for schedulers --- .../schedulers/base-scheduler-subschema.json | 69 ++++++++++++ .../schedulers/mock-scheduler-subschema.json | 59 ++++++++++ .../schemas/schedulers/scheduler-schema.json | 105 +++--------------- .../schedulers/sync-scheduler-subschema.json | 27 +++++ 4 files changed, 168 insertions(+), 92 deletions(-) create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json new file mode 100644 index 0000000000..702da1eec3 --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json @@ -0,0 +1,69 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json", + "title": "mlos_bench base Scheduler config schema definitions", + "description": "mlos_bench base Scheduler config schema definitions for all Scheduler types.", + + "$defs": { + "base_scheduler_config": { + "$comment": "config properties common to all Scheduler types.", + "description": "The scheduler-specific config.", + "type": "object", + "minProperties": 1, + "properties": { + "experiment_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/experiment_id" + }, + "trial_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/trial_id" + }, + "config_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/config_id" + }, + "teardown": { + "description": "Whether to teardown the experiment after running it.", + "type": "boolean" + }, + "max_trials": { + "description": "Max. number of trials to run. Use -1 or 0 for unlimited.", + "type": "integer", + "minimum": -1, + "examples": [50, -1] + }, + "trial_config_repeat_count": { + "description": "Number of times to repeat a config.", + "type": "integer", + "minimum": 1, + "examples": [3, 5] + } + } + } + }, + + "type": "object", + "properties": { + "$schema": { + "description": "The schema to use for validating the scheduler config (accepts both URLs and local paths).", + "type": "string", + "$comment": "This is optional, but if provided, should match the name of the root schema file.", + "pattern": "/schemas/schedulers/scheduler-schema.json$" + }, + + "description": { + "description": "Optional description of the config.", + "type": "string" + }, + + "class": { + "description": "The name of the scheduler class to use.", + "type": "string", + "$comment": "Exact matches are handled elsewhere.", + "pattern": "^mlos_bench[.]schedulers[.]" + }, + + "config": { + "$ref": "#/$defs/base_scheduler_config" + } + }, + "required": ["class"] +} diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json new file mode 100644 index 0000000000..ba3d25589a --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json @@ -0,0 +1,59 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json", + "title": "mlos_bench Mock Scheduler config", + "description": "config for an mlos_bench Mock Scheduler", + "type": "object", + "properties": { + "class": { + "enum": [ + "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler" + ] + }, + "config": { + "type": "object", + "allOf": [ + { + "$ref": "../base-scheduler-subschema.json#/$defs/base_scheduler_config" + }, + { + "type": "object", + "properties": { + "mock_trial_data": { + "description": "A list of trial data to use for testing.", + "type": "array", + "items": { + "type": "object", + "properties": { + "trial_id": { + "type": "string" + }, + "status": { + "type": "enum", + "enum": [ + null, + "UNKNOWN", + "PENDING", + "READY", + "RUNNING", + "SUCCEEDED", + "CANCELED", + "FAILED", + "TIMED_OUT" + ] + }, + "score": "number" + }, + "required": ["trial_id", "status"] + } + } + }, + "minProperties": 1 + } + ], + "minProperties": 1, + "unevaluatedProperties": false + } + }, + "required": ["class"] +} diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json index 81b2e79754..99096a1c9f 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -2,105 +2,26 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", "title": "mlos_bench Scheduler config", - - "$defs": { - "comment": { - "$comment": "This section contains reusable partial schema bits (or just split out for readability)" - }, - - "config_base_scheduler": { - "$comment": "config properties common to all Scheduler types.", - "type": "object", - "properties": { - "experiment_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/experiment_id" - }, - "trial_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/trial_id" - }, - "config_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/config_id" - }, - "teardown": { - "description": "Whether to teardown the experiment after running it.", - "type": "boolean" - }, - "max_trials": { - "description": "Max. number of trials to run. Use -1 or 0 for unlimited.", - "type": "integer", - "minimum": -1, - "examples": [50, -1] - }, - "trial_config_repeat_count": { - "description": "Number of times to repeat a config.", - "type": "integer", - "minimum": 1, - "examples": [3, 5] - } - } - } - }, - "description": "config for the mlos_bench scheduler", "$comment": "top level schema document rules", - "type": "object", - "properties": { - "$schema": { - "description": "The schema to use for validating the scheduler config (accepts both URLs and local paths).", - "type": "string", - "$comment": "This is optional, but if provided, should match the name of this file.", - "pattern": "/schemas/schedulers/scheduler-schema.json$" - }, - - "description": { - "description": "Optional description of the config.", - "type": "string" - }, - "class": { - "description": "The name of the scheduler class to use.", - "$comment": "required", - "enum": [ - "mlos_bench.schedulers.SyncScheduler", - "mlos_bench.schedulers.sync_scheduler.SyncScheduler" - ] + "type": "object", + "allOf": [ + { + "$comment": "All scheduler subschemas support these base properties.", + "$ref": "../base-scheduler-subschema.json" }, - - "config": { - "description": "The scheduler-specific config.", - "$comment": "Stub for scheduler-specific config appended with condition statements below", - "type": "object", - "minProperties": 1 - } - }, - "required": ["class"], - - "oneOf": [ { - "$comment": "extensions to the 'config' object properties when synchronous scheduler is being used", - "if": { - "properties": { - "class": { - "enum": [ - "mlos_bench.schedulers.SyncScheduler", - "mlos_bench.schedulers.sync_scheduler.SyncScheduler" - ] - } + "$comment": "The set of known scheduler subschemas.", + "oneOf": [ + { + "$ref": "./sync-scheduler-subschema.json" }, - "required": ["class"] - }, - "then": { - "properties": { - "config": { - "type": "object", - "allOf": [{ "$ref": "#/$defs/config_base_scheduler" }], - "$comment": "disallow other properties", - "unevaluatedProperties": false - } + { + "$ref": "./mock-scheduler-subschema.json" } - }, - "else": false + ] } ], - "unevaluatedProperties": false + "required": ["class"] } diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json new file mode 100644 index 0000000000..609339521b --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json", + "title": "mlos_bench SyncScheduler config", + "description": "config for an mlos_bench SyncScheduler", + "type": "object", + "properties": { + "class": { + "enum": [ + "mlos_bench.schedulers.SyncScheduler", + "mlos_bench.schedulers.sync_scheduler.SyncScheduler" + ] + }, + "config": { + "type": "object", + "$comment": "No extra properties supported by SyncScheduler.", + "allOf": [ + { + "$ref": "../base-scheduler-subschema.json#/$defs/base_scheduler_config" + } + ], + "minProperties": 1, + "unevaluatedProperties": false + } + }, + "required": ["class"] +} From 25321da45286246a960411b4bb285c599428d714 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:40:31 -0500 Subject: [PATCH 036/109] starting to introduce a mock scheduler --- .../mlos_bench/tests/schedulers/__init__.py | 0 .../mlos_bench/tests/schedulers/conftest.py | 0 .../tests/schedulers/mock_scheduler.py | 69 +++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 mlos_bench/mlos_bench/tests/schedulers/__init__.py create mode 100644 mlos_bench/mlos_bench/tests/schedulers/conftest.py create mode 100644 mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py diff --git a/mlos_bench/mlos_bench/tests/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/schedulers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py new file mode 100644 index 0000000000..0be3645b6e --- /dev/null +++ b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py @@ -0,0 +1,69 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +A mock scheduler that returns predefined status and score for specific trial IDs. +""" + +import logging + +from datetime import datetime +from collections.abc import Iterable +from typing import Any + +from pytz import UTC + +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.storage.base_storage import Storage +from mlos_bench.schedulers.base_scheduler import Optimizer +from mlos_bench.schedulers.trial_runner import TrialRunner + +_LOG = logging.getLogger(__name__) + + +class MockScheduler(Scheduler): + """ + A mock scheduler that returns predefined status and score for specific trial IDs. + """ + + def __init__( # pylint: disable=too-many-arguments + self, + *, + config: dict[str, Any], + global_config: dict[str, Any], + trial_runners: Iterable[TrialRunner], + optimizer: Optimizer, + storage: Storage, + root_env_config: str, + ) -> None: + super().__init__( + config=config, + global_config=global_config, + trial_runners=trial_runners, + optimizer=optimizer, + storage=storage, + root_env_config=root_env_config, + ) + self._mock_trial_data = config.get("mock_trial_data", []) + self._mock_trial_data = { + trial_info["trial_id"]: trial_info + for trial_info in self._mock_trial_data + } + + def run_trial(self, trial: Storage.Trial) -> None: + """ + Mock the execution of a trial. + + Parameters: + ---------- + trial : Storage.Trial + The trial to be executed. + """ + trial_id = trial.trial_id + if trial_id not in self._mock_trial_data: + raise ValueError(f"Trial ID {trial_id} not found in mock trial data.") + + trial_info = self._mock_trial_data[trial_id] + _LOG.info("Running trial %d: %s", trial_id, trial_info) + trial.update(trial_info["status"], datetime.now(UTC), trial_info.get("score")) From 0f36a40efafb637168e6e225d2dc3640dbf02ea2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:40:38 -0500 Subject: [PATCH 037/109] comment --- mlos_bench/mlos_bench/storage/sql/experiment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 5c75ad52a2..037a99f1f2 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -156,6 +156,8 @@ def load_telemetry(self, trial_id: int) -> list[tuple[datetime, str, Any]]: # TODO: Add a test for this method. def get_longest_prefix_finished_trial_id(self) -> int: with self._engine.connect() as conn: + # TODO: Do this in a single query? + # Get the first (minimum) trial ID with an unfinished status. first_unfinished_trial_id_stmt = ( self._schema.trial.select() From cbdfbf1735d707111b496c3c9ac454100f904629 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:40:54 -0500 Subject: [PATCH 038/109] fixups --- .../mlos_bench/storage/sql/experiment.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 037a99f1f2..4fb560411d 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -166,7 +166,11 @@ def get_longest_prefix_finished_trial_id(self) -> int: ) .where( self._schema.trial.c.exp_id == self._experiment_id, - func.not_(self._schema.trial.c.status.in_(Status.completed_statuses())), + func.not_( + self._schema.trial.c.status.in_( + [status.name for status in Status.completed_statuses()] + ) + ), ) ) max_trial_id = conn.execute(first_unfinished_trial_id_stmt).scalar() @@ -175,7 +179,8 @@ def get_longest_prefix_finished_trial_id(self) -> int: # finished (or not exist, which is fine as a limit). return int(max_trial_id) - 1 - # No unfinished trials, so get the largest completed trial ID. + # No unfinished trials, so *all* trials are completed - get the + # largest completed trial ID. last_finished_trial_id = ( self._schema.trial.select() .with_only_columns( @@ -183,13 +188,15 @@ def get_longest_prefix_finished_trial_id(self) -> int: ) .where( self._schema.trial.c.exp_id == self._experiment_id, - self._schema.trial.c.status.in_(Status.completed_statuses()), + self._schema.trial.c.status.in_( + [status.name for status in Status.completed_statuses()] + ), ) ) max_trial_id = conn.execute(last_finished_trial_id).scalar() if max_trial_id is not None: return int(max_trial_id) - # Else no trial exist. + # Else no trials yet exist for this experiment. return -1 def load( @@ -209,12 +216,7 @@ def load( self._schema.trial.c.exp_id == self._experiment_id, self._schema.trial.c.trial_id > last_trial_id, self._schema.trial.c.status.in_( - [ - Status.SUCCEEDED.name, - Status.FAILED.name, - Status.TIMED_OUT.name, - Status.CANCELED.name, - ] + [status.name for status in Status.completed_statuses()] ), ) .order_by( From 865d8539e77be95867a4a77241940cbf44fe450f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:41:05 -0500 Subject: [PATCH 039/109] rename a test for clarity --- .../mlos_bench/tests/storage/trial_schedule_test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index cabf48211d..0dccc7a292 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -25,14 +25,18 @@ def _trial_ids(trials: Iterator[Storage.Trial]) -> set[int]: return {t.trial_id for t in trials} -def test_schedule_trial( +def test_storage_schedule( storage: Storage, exp_storage: Storage.Experiment, tunable_groups: TunableGroups, ) -> None: # pylint: disable=too-many-locals,too-many-statements - """Schedule several trials for future execution and retrieve them later at certain - timestamps. + """Test some storage functions that schedule several trials for future + execution and retrieve them later at certain timestamps. + + Notes + ----- + This doesn't actually test the Scheduler. """ timestamp = datetime.now(UTC) timedelta_1min = timedelta(minutes=1) From 234d2911cdf72b3e5d07ff643538950e4d501e81 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:43:00 -0500 Subject: [PATCH 040/109] linting --- .../mlos_bench/tests/schedulers/__init__.py | 4 ++++ .../mlos_bench/tests/schedulers/conftest.py | 4 ++++ .../tests/schedulers/mock_scheduler.py | 22 ++++++++----------- .../mlos_bench/tests/storage/exp_load_test.py | 16 +++++--------- .../tests/storage/trial_schedule_test.py | 6 ++--- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/schedulers/__init__.py index e69de29bb2..8b9f1bdf79 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/__init__.py +++ b/mlos_bench/mlos_bench/tests/schedulers/__init__.py @@ -0,0 +1,4 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index e69de29bb2..8b9f1bdf79 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -0,0 +1,4 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py index 0be3645b6e..780b621046 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py @@ -2,29 +2,25 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -A mock scheduler that returns predefined status and score for specific trial IDs. -""" +"""A mock scheduler that returns predefined status and score for specific trial IDs.""" import logging - -from datetime import datetime from collections.abc import Iterable +from datetime import datetime from typing import Any from pytz import UTC -from mlos_bench.schedulers.base_scheduler import Scheduler -from mlos_bench.storage.base_storage import Storage -from mlos_bench.schedulers.base_scheduler import Optimizer +from mlos_bench.schedulers.base_scheduler import Optimizer, Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.storage.base_storage import Storage _LOG = logging.getLogger(__name__) class MockScheduler(Scheduler): - """ - A mock scheduler that returns predefined status and score for specific trial IDs. + """A mock scheduler that returns predefined status and score for specific trial + IDs. """ def __init__( # pylint: disable=too-many-arguments @@ -47,15 +43,14 @@ def __init__( # pylint: disable=too-many-arguments ) self._mock_trial_data = config.get("mock_trial_data", []) self._mock_trial_data = { - trial_info["trial_id"]: trial_info - for trial_info in self._mock_trial_data + trial_info["trial_id"]: trial_info for trial_info in self._mock_trial_data } def run_trial(self, trial: Storage.Trial) -> None: """ Mock the execution of a trial. - Parameters: + Parameters ---------- trial : Storage.Trial The trial to be executed. @@ -66,4 +61,5 @@ def run_trial(self, trial: Storage.Trial) -> None: trial_info = self._mock_trial_data[trial_id] _LOG.info("Running trial %d: %s", trial_id, trial_info) + # Don't run it - just update the status and optionally score. trial.update(trial_info["status"], datetime.now(UTC), trial_info.get("score")) diff --git a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py index cd8133fe04..cae402faff 100644 --- a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py +++ b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py @@ -3,11 +3,9 @@ # Licensed under the MIT License. # """Unit tests for the storage subsystem.""" -from atexit import register from datetime import datetime, timedelta, tzinfo from random import random -from more_itertools import last import pytest from pytz import UTC @@ -166,8 +164,8 @@ def test_empty_get_longest_prefix_finished_trial_id( storage: Storage, exp_storage: Storage.Experiment, ) -> None: - """ - Test that the longest prefix of finished trials is empty when no trials are present. + """Test that the longest prefix of finished trials is empty when no trials are + present. """ assert not storage.experiments[ exp_storage.experiment_id @@ -186,9 +184,8 @@ def test_sync_success_get_longest_prefix_finished_trial_id( exp_storage: Storage.Experiment, tunable_groups: TunableGroups, ) -> None: - """ - Test that the longest prefix of finished trials is returned correctly when - all trial are finished. + """Test that the longest prefix of finished trials is returned correctly when all + trial are finished. """ timestamp = datetime.now(UTC) config = {} @@ -217,9 +214,8 @@ def test_async_get_longest_prefix_finished_trial_id( exp_storage: Storage.Experiment, tunable_groups: TunableGroups, ) -> None: - """ - Test that the longest prefix of finished trials is returned correctly when - trial finish out of order. + """Test that the longest prefix of finished trials is returned correctly when trial + finish out of order. """ timestamp = datetime.now(UTC) config = {} diff --git a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py index 0dccc7a292..90bf84e7bb 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_schedule_test.py @@ -5,7 +5,6 @@ """Unit tests for scheduling trials for some future time.""" from collections.abc import Iterator from datetime import datetime, timedelta -from random import random from pytz import UTC @@ -31,8 +30,9 @@ def test_storage_schedule( tunable_groups: TunableGroups, ) -> None: # pylint: disable=too-many-locals,too-many-statements - """Test some storage functions that schedule several trials for future - execution and retrieve them later at certain timestamps. + """ + Test some storage functions that schedule several trials for future execution and + retrieve them later at certain timestamps. Notes ----- From 2010ec80a42c4275fe23b6a46daaae44a5042637 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:46:08 -0500 Subject: [PATCH 041/109] comments --- mlos_bench/mlos_bench/tests/schedulers/__init__.py | 1 + mlos_bench/mlos_bench/tests/schedulers/conftest.py | 1 + mlos_bench/mlos_bench/tests/storage/exp_load_test.py | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/schedulers/__init__.py index 8b9f1bdf79..f4b705771b 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/__init__.py +++ b/mlos_bench/mlos_bench/tests/schedulers/__init__.py @@ -2,3 +2,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # +"""mlos_bench.tests.schedulers""" diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index 8b9f1bdf79..af3a82396c 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -2,3 +2,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # +"""pytest fixtures for mlos_bench.schedulers tests.""" diff --git a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py index cae402faff..f82833dd64 100644 --- a/mlos_bench/mlos_bench/tests/storage/exp_load_test.py +++ b/mlos_bench/mlos_bench/tests/storage/exp_load_test.py @@ -244,12 +244,13 @@ def test_async_get_longest_prefix_finished_trial_id( # TODO: Can we simplify this to use something like SyncScheduler and # bulk_register_completed_trials? +# TODO: Update to use MockScheduler def test_exp_load_async( exp_storage: Storage.Experiment, tunable_groups: TunableGroups, ) -> None: """ - Test the `omit_registered_trial_ids` argument of the `Experiment.load()` method. + Test the ``omit_registered_trial_ids`` argument of the ``Experiment.load()`` method. Create several trials with mixed statuses (PENDING and completed). Verify that completed trials included in a local set of registered configs From dd5643fda521b85b23a48e38429bb8062ff897f0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 21:46:38 +0000 Subject: [PATCH 042/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/schedulers/__init__.py | 2 +- mlos_bench/mlos_bench/tests/schedulers/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/schedulers/__init__.py index f4b705771b..4bc0076079 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/__init__.py +++ b/mlos_bench/mlos_bench/tests/schedulers/__init__.py @@ -2,4 +2,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""mlos_bench.tests.schedulers""" +"""mlos_bench.tests.schedulers.""" diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index af3a82396c..83a18783cc 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -2,4 +2,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""pytest fixtures for mlos_bench.schedulers tests.""" +"""Pytest fixtures for mlos_bench.schedulers tests.""" From 12ac36071d89fa9fc46a4000f8a56af1d2ac6b7d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 13 May 2025 16:50:55 -0500 Subject: [PATCH 043/109] stub out a test for loading example configs --- .../test_load_scheduler_config_examples.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py new file mode 100644 index 0000000000..4632fbc6ae --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -0,0 +1,55 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Tests for loading scheduler config examples.""" +import logging + +import pytest + +from mlos_bench.config.schemas.config_schemas import ConfigSchema +from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.tests.config import locate_config_examples +from mlos_bench.util import get_class_from_name + +_LOG = logging.getLogger(__name__) +_LOG.setLevel(logging.DEBUG) + + +# Get the set of configs to test. +CONFIG_TYPE = "schedulers" + + +def filter_configs(configs_to_filter: list[str]) -> list[str]: + """If necessary, filter out json files that aren't for the module we're testing.""" + return configs_to_filter + + +configs = locate_config_examples( + ConfigPersistenceService.BUILTIN_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +assert configs + + +@pytest.mark.parametrize("config_path", configs) +def test_load_scheduler_config_examples( + config_loader_service: ConfigPersistenceService, + config_path: str, +) -> None: + """Tests loading a config example.""" + config = config_loader_service.load_config(config_path, ConfigSchema.SCHEDULER) + assert isinstance(config, dict) + # Skip schema loading that would require a database connection for this test. + config["config"]["lazy_schema_create"] = True + cls = get_class_from_name(config["class"]) + assert issubclass(cls, Scheduler) + # Make an instance of the class based on the config. + scheduler_inst = config_loader_service.build_scheduler( + config=config, + service=config_loader_service, + ) + assert scheduler_inst is not None + assert isinstance(scheduler_inst, cls) From d4f50583bbaa88057edbeeb1f70197ccc35aa74c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 21:51:26 +0000 Subject: [PATCH 044/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../config/schedulers/test_load_scheduler_config_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index 4632fbc6ae..9127bbbfca 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -8,8 +8,8 @@ import pytest from mlos_bench.config.schemas.config_schemas import ConfigSchema -from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.tests.config import locate_config_examples from mlos_bench.util import get_class_from_name From 535ead04e94f7fcff57b947b455ffa5087627c15 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 12:12:47 -0500 Subject: [PATCH 045/109] switch to frozenset --- mlos_bench/mlos_bench/environments/status.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 7da72e72bd..6cafd6f7bb 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -105,10 +105,12 @@ def is_timed_out(self) -> bool: return self == Status.TIMED_OUT -COMPLETED_STATUSES = { - Status.SUCCEEDED, - Status.CANCELED, - Status.FAILED, - Status.TIMED_OUT, -} +COMPLETED_STATUSES = frozenset( + { + Status.SUCCEEDED, + Status.CANCELED, + Status.FAILED, + Status.TIMED_OUT, + } +) """The set of completed statuses.""" From b370787a4ed7f678d947e8a3e52462c4338ff9dd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 12:13:04 -0500 Subject: [PATCH 046/109] tweak --- mlos_bench/mlos_bench/tests/environments/test_status.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 56d26030fc..351643c1f7 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -79,7 +79,10 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: ], ) def test_status_from_str_invalid(invalid_input: Any) -> None: - """Test :py:meth:`Status.from_str` raises ValueError for invalid input.""" + """ + Test :py:meth:`Status.from_str` returns :py:attr:`Status.UNKNOWN` for + invalid input. + """ assert ( Status.from_str(invalid_input) == Status.UNKNOWN ), f"Expected Status.UNKNOWN for invalid input: {invalid_input}" From 18d41e5f92f45298930197d27bcd4688190aa21c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 17:13:31 +0000 Subject: [PATCH 047/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/environments/test_status.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 351643c1f7..a8dfdd61b8 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -79,9 +79,8 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: ], ) def test_status_from_str_invalid(invalid_input: Any) -> None: - """ - Test :py:meth:`Status.from_str` returns :py:attr:`Status.UNKNOWN` for - invalid input. + """Test :py:meth:`Status.from_str` returns :py:attr:`Status.UNKNOWN` for invalid + input. """ assert ( Status.from_str(invalid_input) == Status.UNKNOWN From a9f3b08b8c0062108662c8a1712c8c26c9ce9365 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 12:21:07 -0500 Subject: [PATCH 048/109] type fixups --- mlos_bench/mlos_bench/environments/status.py | 2 +- mlos_bench/mlos_bench/tests/environments/test_status.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 6cafd6f7bb..6d76d7206c 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -52,7 +52,7 @@ def is_good(self) -> bool: # Class based accessor method to avoid circular import @staticmethod - def completed_statuses() -> set["Status"]: + def completed_statuses() -> frozenset["Status"]: """Get the set of :py:data:`.COMPLETED_STATUSES`.""" return COMPLETED_STATUSES diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index a8dfdd61b8..3c0a9bccf3 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -60,7 +60,7 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: if input_str.isdigit(): # Also test the numeric representation assert ( - Status.from_str(int(input_str)) == expected_status # type: ignore + Status.from_str(int(input_str)) == expected_status ), f"Expected {expected_status} for input: {int(input_str)}" From e56f7c71ea2117636dfc8eb5e02e6566d86e3484 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 13:04:06 -0500 Subject: [PATCH 049/109] doc string tweaks --- mlos_bench/mlos_bench/environments/status.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 6d76d7206c..d060a6b2f8 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -113,4 +113,15 @@ def is_timed_out(self) -> bool: Status.TIMED_OUT, } ) -"""The set of completed statuses.""" +""" +The set of completed statuses. + +Includes all statuses that indicate the trial or experiment has finished, either +successfully or not. +This set is used to determine if a trial or experiment has reached a final state. +This includes: +- :py:data:`.Status.SUCCEEDED`: The trial or experiment completed successfully. +- :py:data:`.Status.CANCELED`: The trial or experiment was canceled. +- :py:data:`.Status.FAILED`: The trial or experiment failed. +- :py:data:`.Status.TIMED_OUT`: The trial or experiment timed out. +""" From cd8b88f88af847f227a5898e542327cda866a23d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 13:57:28 -0500 Subject: [PATCH 050/109] Start introducing MockScheduler for better Scheduler testing --- .../schedulers/base-scheduler-subschema.json | 69 ++++++++++++ .../schedulers/mock-scheduler-subschema.json | 59 ++++++++++ .../schemas/schedulers/scheduler-schema.json | 105 +++--------------- .../schedulers/sync-scheduler-subschema.json | 27 +++++ .../test_load_scheduler_config_examples.py | 55 +++++++++ .../mlos_bench/tests/schedulers/__init__.py | 5 + .../mlos_bench/tests/schedulers/conftest.py | 5 + .../tests/schedulers/mock_scheduler.py | 65 +++++++++++ 8 files changed, 298 insertions(+), 92 deletions(-) create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json create mode 100644 mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py create mode 100644 mlos_bench/mlos_bench/tests/schedulers/__init__.py create mode 100644 mlos_bench/mlos_bench/tests/schedulers/conftest.py create mode 100644 mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json new file mode 100644 index 0000000000..702da1eec3 --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json @@ -0,0 +1,69 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json", + "title": "mlos_bench base Scheduler config schema definitions", + "description": "mlos_bench base Scheduler config schema definitions for all Scheduler types.", + + "$defs": { + "base_scheduler_config": { + "$comment": "config properties common to all Scheduler types.", + "description": "The scheduler-specific config.", + "type": "object", + "minProperties": 1, + "properties": { + "experiment_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/experiment_id" + }, + "trial_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/trial_id" + }, + "config_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/config_id" + }, + "teardown": { + "description": "Whether to teardown the experiment after running it.", + "type": "boolean" + }, + "max_trials": { + "description": "Max. number of trials to run. Use -1 or 0 for unlimited.", + "type": "integer", + "minimum": -1, + "examples": [50, -1] + }, + "trial_config_repeat_count": { + "description": "Number of times to repeat a config.", + "type": "integer", + "minimum": 1, + "examples": [3, 5] + } + } + } + }, + + "type": "object", + "properties": { + "$schema": { + "description": "The schema to use for validating the scheduler config (accepts both URLs and local paths).", + "type": "string", + "$comment": "This is optional, but if provided, should match the name of the root schema file.", + "pattern": "/schemas/schedulers/scheduler-schema.json$" + }, + + "description": { + "description": "Optional description of the config.", + "type": "string" + }, + + "class": { + "description": "The name of the scheduler class to use.", + "type": "string", + "$comment": "Exact matches are handled elsewhere.", + "pattern": "^mlos_bench[.]schedulers[.]" + }, + + "config": { + "$ref": "#/$defs/base_scheduler_config" + } + }, + "required": ["class"] +} diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json new file mode 100644 index 0000000000..ba3d25589a --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json @@ -0,0 +1,59 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json", + "title": "mlos_bench Mock Scheduler config", + "description": "config for an mlos_bench Mock Scheduler", + "type": "object", + "properties": { + "class": { + "enum": [ + "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler" + ] + }, + "config": { + "type": "object", + "allOf": [ + { + "$ref": "../base-scheduler-subschema.json#/$defs/base_scheduler_config" + }, + { + "type": "object", + "properties": { + "mock_trial_data": { + "description": "A list of trial data to use for testing.", + "type": "array", + "items": { + "type": "object", + "properties": { + "trial_id": { + "type": "string" + }, + "status": { + "type": "enum", + "enum": [ + null, + "UNKNOWN", + "PENDING", + "READY", + "RUNNING", + "SUCCEEDED", + "CANCELED", + "FAILED", + "TIMED_OUT" + ] + }, + "score": "number" + }, + "required": ["trial_id", "status"] + } + } + }, + "minProperties": 1 + } + ], + "minProperties": 1, + "unevaluatedProperties": false + } + }, + "required": ["class"] +} diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json index 81b2e79754..99096a1c9f 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -2,105 +2,26 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", "title": "mlos_bench Scheduler config", - - "$defs": { - "comment": { - "$comment": "This section contains reusable partial schema bits (or just split out for readability)" - }, - - "config_base_scheduler": { - "$comment": "config properties common to all Scheduler types.", - "type": "object", - "properties": { - "experiment_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/experiment_id" - }, - "trial_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/trial_id" - }, - "config_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/config_id" - }, - "teardown": { - "description": "Whether to teardown the experiment after running it.", - "type": "boolean" - }, - "max_trials": { - "description": "Max. number of trials to run. Use -1 or 0 for unlimited.", - "type": "integer", - "minimum": -1, - "examples": [50, -1] - }, - "trial_config_repeat_count": { - "description": "Number of times to repeat a config.", - "type": "integer", - "minimum": 1, - "examples": [3, 5] - } - } - } - }, - "description": "config for the mlos_bench scheduler", "$comment": "top level schema document rules", - "type": "object", - "properties": { - "$schema": { - "description": "The schema to use for validating the scheduler config (accepts both URLs and local paths).", - "type": "string", - "$comment": "This is optional, but if provided, should match the name of this file.", - "pattern": "/schemas/schedulers/scheduler-schema.json$" - }, - - "description": { - "description": "Optional description of the config.", - "type": "string" - }, - "class": { - "description": "The name of the scheduler class to use.", - "$comment": "required", - "enum": [ - "mlos_bench.schedulers.SyncScheduler", - "mlos_bench.schedulers.sync_scheduler.SyncScheduler" - ] + "type": "object", + "allOf": [ + { + "$comment": "All scheduler subschemas support these base properties.", + "$ref": "../base-scheduler-subschema.json" }, - - "config": { - "description": "The scheduler-specific config.", - "$comment": "Stub for scheduler-specific config appended with condition statements below", - "type": "object", - "minProperties": 1 - } - }, - "required": ["class"], - - "oneOf": [ { - "$comment": "extensions to the 'config' object properties when synchronous scheduler is being used", - "if": { - "properties": { - "class": { - "enum": [ - "mlos_bench.schedulers.SyncScheduler", - "mlos_bench.schedulers.sync_scheduler.SyncScheduler" - ] - } + "$comment": "The set of known scheduler subschemas.", + "oneOf": [ + { + "$ref": "./sync-scheduler-subschema.json" }, - "required": ["class"] - }, - "then": { - "properties": { - "config": { - "type": "object", - "allOf": [{ "$ref": "#/$defs/config_base_scheduler" }], - "$comment": "disallow other properties", - "unevaluatedProperties": false - } + { + "$ref": "./mock-scheduler-subschema.json" } - }, - "else": false + ] } ], - "unevaluatedProperties": false + "required": ["class"] } diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json new file mode 100644 index 0000000000..609339521b --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json", + "title": "mlos_bench SyncScheduler config", + "description": "config for an mlos_bench SyncScheduler", + "type": "object", + "properties": { + "class": { + "enum": [ + "mlos_bench.schedulers.SyncScheduler", + "mlos_bench.schedulers.sync_scheduler.SyncScheduler" + ] + }, + "config": { + "type": "object", + "$comment": "No extra properties supported by SyncScheduler.", + "allOf": [ + { + "$ref": "../base-scheduler-subschema.json#/$defs/base_scheduler_config" + } + ], + "minProperties": 1, + "unevaluatedProperties": false + } + }, + "required": ["class"] +} diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py new file mode 100644 index 0000000000..9127bbbfca --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -0,0 +1,55 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Tests for loading scheduler config examples.""" +import logging + +import pytest + +from mlos_bench.config.schemas.config_schemas import ConfigSchema +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.tests.config import locate_config_examples +from mlos_bench.util import get_class_from_name + +_LOG = logging.getLogger(__name__) +_LOG.setLevel(logging.DEBUG) + + +# Get the set of configs to test. +CONFIG_TYPE = "schedulers" + + +def filter_configs(configs_to_filter: list[str]) -> list[str]: + """If necessary, filter out json files that aren't for the module we're testing.""" + return configs_to_filter + + +configs = locate_config_examples( + ConfigPersistenceService.BUILTIN_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +assert configs + + +@pytest.mark.parametrize("config_path", configs) +def test_load_scheduler_config_examples( + config_loader_service: ConfigPersistenceService, + config_path: str, +) -> None: + """Tests loading a config example.""" + config = config_loader_service.load_config(config_path, ConfigSchema.SCHEDULER) + assert isinstance(config, dict) + # Skip schema loading that would require a database connection for this test. + config["config"]["lazy_schema_create"] = True + cls = get_class_from_name(config["class"]) + assert issubclass(cls, Scheduler) + # Make an instance of the class based on the config. + scheduler_inst = config_loader_service.build_scheduler( + config=config, + service=config_loader_service, + ) + assert scheduler_inst is not None + assert isinstance(scheduler_inst, cls) diff --git a/mlos_bench/mlos_bench/tests/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/schedulers/__init__.py new file mode 100644 index 0000000000..4bc0076079 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/schedulers/__init__.py @@ -0,0 +1,5 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""mlos_bench.tests.schedulers.""" diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py new file mode 100644 index 0000000000..83a18783cc --- /dev/null +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -0,0 +1,5 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Pytest fixtures for mlos_bench.schedulers tests.""" diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py new file mode 100644 index 0000000000..780b621046 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py @@ -0,0 +1,65 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""A mock scheduler that returns predefined status and score for specific trial IDs.""" + +import logging +from collections.abc import Iterable +from datetime import datetime +from typing import Any + +from pytz import UTC + +from mlos_bench.schedulers.base_scheduler import Optimizer, Scheduler +from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.storage.base_storage import Storage + +_LOG = logging.getLogger(__name__) + + +class MockScheduler(Scheduler): + """A mock scheduler that returns predefined status and score for specific trial + IDs. + """ + + def __init__( # pylint: disable=too-many-arguments + self, + *, + config: dict[str, Any], + global_config: dict[str, Any], + trial_runners: Iterable[TrialRunner], + optimizer: Optimizer, + storage: Storage, + root_env_config: str, + ) -> None: + super().__init__( + config=config, + global_config=global_config, + trial_runners=trial_runners, + optimizer=optimizer, + storage=storage, + root_env_config=root_env_config, + ) + self._mock_trial_data = config.get("mock_trial_data", []) + self._mock_trial_data = { + trial_info["trial_id"]: trial_info for trial_info in self._mock_trial_data + } + + def run_trial(self, trial: Storage.Trial) -> None: + """ + Mock the execution of a trial. + + Parameters + ---------- + trial : Storage.Trial + The trial to be executed. + """ + trial_id = trial.trial_id + if trial_id not in self._mock_trial_data: + raise ValueError(f"Trial ID {trial_id} not found in mock trial data.") + + trial_info = self._mock_trial_data[trial_id] + _LOG.info("Running trial %d: %s", trial_id, trial_info) + # Don't run it - just update the status and optionally score. + trial.update(trial_info["status"], datetime.now(UTC), trial_info.get("score")) From 2c9c968f94366a7316f356178c32193db36d6907 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 17:00:33 -0500 Subject: [PATCH 051/109] fixup scheduler schemas refactor --- .../config/schemas/schedulers/mock-scheduler-subschema.json | 3 +-- .../mlos_bench/config/schemas/schedulers/scheduler-schema.json | 2 +- .../config/schemas/schedulers/sync-scheduler-subschema.json | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json index ba3d25589a..a784145c69 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json @@ -14,7 +14,7 @@ "type": "object", "allOf": [ { - "$ref": "../base-scheduler-subschema.json#/$defs/base_scheduler_config" + "$ref": "base-scheduler-subschema.json#/$defs/base_scheduler_config" }, { "type": "object", @@ -29,7 +29,6 @@ "type": "string" }, "status": { - "type": "enum", "enum": [ null, "UNKNOWN", diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json index 99096a1c9f..c3a79ef605 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -9,7 +9,7 @@ "allOf": [ { "$comment": "All scheduler subschemas support these base properties.", - "$ref": "../base-scheduler-subschema.json" + "$ref": "./base-scheduler-subschema.json" }, { "$comment": "The set of known scheduler subschemas.", diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json index 609339521b..e7f0e40eb2 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json @@ -16,7 +16,7 @@ "$comment": "No extra properties supported by SyncScheduler.", "allOf": [ { - "$ref": "../base-scheduler-subschema.json#/$defs/base_scheduler_config" + "$ref": "base-scheduler-subschema.json#/$defs/base_scheduler_config" } ], "minProperties": 1, From 2f4a82e25cc73b7e0ebfdab7e1feb76614540381 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 17:00:59 -0500 Subject: [PATCH 052/109] reorg optimizer fixtures for reuse --- .../mlos_bench/tests/optimizers/conftest.py | 180 ++---------------- .../mlos_bench/tests/optimizers/fixtures.py | 171 +++++++++++++++++ 2 files changed, 184 insertions(+), 167 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/optimizers/fixtures.py diff --git a/mlos_bench/mlos_bench/tests/optimizers/conftest.py b/mlos_bench/mlos_bench/tests/optimizers/conftest.py index aaa6b14753..f1c758bca6 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/conftest.py +++ b/mlos_bench/mlos_bench/tests/optimizers/conftest.py @@ -2,170 +2,16 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Test fixtures for mlos_bench optimizers.""" - - -import pytest - -from mlos_bench.optimizers.manual_optimizer import ManualOptimizer -from mlos_bench.optimizers.mlos_core_optimizer import MlosCoreOptimizer -from mlos_bench.optimizers.mock_optimizer import MockOptimizer -from mlos_bench.tests import SEED -from mlos_bench.tunables.tunable_groups import TunableGroups - -# pylint: disable=redefined-outer-name - - -@pytest.fixture -def mock_configs() -> list[dict]: - """Mock configurations of earlier experiments.""" - return [ - { - "vmSize": "Standard_B4ms", - "idle": "halt", - "kernel_sched_migration_cost_ns": 50000, - "kernel_sched_latency_ns": 1000000, - }, - { - "vmSize": "Standard_B4ms", - "idle": "halt", - "kernel_sched_migration_cost_ns": 40000, - "kernel_sched_latency_ns": 2000000, - }, - { - "vmSize": "Standard_B4ms", - "idle": "mwait", - "kernel_sched_migration_cost_ns": -1, # Special value - "kernel_sched_latency_ns": 3000000, - }, - { - "vmSize": "Standard_B2s", - "idle": "mwait", - "kernel_sched_migration_cost_ns": 200000, - "kernel_sched_latency_ns": 4000000, - }, - ] - - -@pytest.fixture -def mock_opt_no_defaults(tunable_groups: TunableGroups) -> MockOptimizer: - """Test fixture for MockOptimizer that ignores the initial configuration.""" - return MockOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "min"}, - "max_suggestions": 5, - "start_with_defaults": False, - "seed": SEED, - }, - ) - - -@pytest.fixture -def mock_opt(tunable_groups: TunableGroups) -> MockOptimizer: - """Test fixture for MockOptimizer.""" - return MockOptimizer( - tunables=tunable_groups, - service=None, - config={"optimization_targets": {"score": "min"}, "max_suggestions": 5, "seed": SEED}, - ) - - -@pytest.fixture -def mock_opt_max(tunable_groups: TunableGroups) -> MockOptimizer: - """Test fixture for MockOptimizer.""" - return MockOptimizer( - tunables=tunable_groups, - service=None, - config={"optimization_targets": {"score": "max"}, "max_suggestions": 10, "seed": SEED}, - ) - - -@pytest.fixture -def flaml_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core FLAML optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "min"}, - "max_suggestions": 15, - "optimizer_type": "FLAML", - "seed": SEED, - }, - ) - - -@pytest.fixture -def flaml_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core FLAML optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "max"}, - "max_suggestions": 15, - "optimizer_type": "FLAML", - "seed": SEED, - }, - ) - - -# FIXME: SMAC's RF model can be non-deterministic at low iterations, which are -# normally calculated as a percentage of the max_suggestions and number of -# tunable dimensions, so for now we set the initial random samples equal to the -# number of iterations and control them with a seed. - -SMAC_ITERATIONS = 10 - - -@pytest.fixture -def smac_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core SMAC optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "min"}, - "max_suggestions": SMAC_ITERATIONS, - "optimizer_type": "SMAC", - "seed": SEED, - "output_directory": None, - # See Above - "n_random_init": SMAC_ITERATIONS, - "max_ratio": 1.0, - }, - ) - - -@pytest.fixture -def smac_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core SMAC optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "max"}, - "max_suggestions": SMAC_ITERATIONS, - "optimizer_type": "SMAC", - "seed": SEED, - "output_directory": None, - # See Above - "n_random_init": SMAC_ITERATIONS, - "max_ratio": 1.0, - }, - ) - - -@pytest.fixture -def manual_opt(tunable_groups: TunableGroups, mock_configs: list[dict]) -> ManualOptimizer: - """Test fixture for ManualOptimizer.""" - return ManualOptimizer( - tunables=tunable_groups, - service=None, - config={ - "max_cycles": 2, - "tunable_values_cycle": mock_configs, - }, - ) +"""Export test fixtures for mlos_bench optimizers.""" + +import mlos_bench.tests.optimizers.fixtures + +mock_configs = mlos_bench.tests.optimizers.fixtures.mock_configs +mock_opt_no_defaults = mlos_bench.tests.optimizers.fixtures.mock_opt_no_defaults +mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt +mock_opt_max = mlos_bench.tests.optimizers.fixtures.mock_opt_max +flaml_opt = mlos_bench.tests.optimizers.fixtures.flaml_opt +flaml_opt_max = mlos_bench.tests.optimizers.fixtures.flaml_opt_max +smac_opt = mlos_bench.tests.optimizers.fixtures.smac_opt +smac_opt_max = mlos_bench.tests.optimizers.fixtures.smac_opt_max +manual_opt = mlos_bench.tests.optimizers.fixtures.manual_opt diff --git a/mlos_bench/mlos_bench/tests/optimizers/fixtures.py b/mlos_bench/mlos_bench/tests/optimizers/fixtures.py new file mode 100644 index 0000000000..aaa6b14753 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/optimizers/fixtures.py @@ -0,0 +1,171 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Test fixtures for mlos_bench optimizers.""" + + +import pytest + +from mlos_bench.optimizers.manual_optimizer import ManualOptimizer +from mlos_bench.optimizers.mlos_core_optimizer import MlosCoreOptimizer +from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.tests import SEED +from mlos_bench.tunables.tunable_groups import TunableGroups + +# pylint: disable=redefined-outer-name + + +@pytest.fixture +def mock_configs() -> list[dict]: + """Mock configurations of earlier experiments.""" + return [ + { + "vmSize": "Standard_B4ms", + "idle": "halt", + "kernel_sched_migration_cost_ns": 50000, + "kernel_sched_latency_ns": 1000000, + }, + { + "vmSize": "Standard_B4ms", + "idle": "halt", + "kernel_sched_migration_cost_ns": 40000, + "kernel_sched_latency_ns": 2000000, + }, + { + "vmSize": "Standard_B4ms", + "idle": "mwait", + "kernel_sched_migration_cost_ns": -1, # Special value + "kernel_sched_latency_ns": 3000000, + }, + { + "vmSize": "Standard_B2s", + "idle": "mwait", + "kernel_sched_migration_cost_ns": 200000, + "kernel_sched_latency_ns": 4000000, + }, + ] + + +@pytest.fixture +def mock_opt_no_defaults(tunable_groups: TunableGroups) -> MockOptimizer: + """Test fixture for MockOptimizer that ignores the initial configuration.""" + return MockOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "min"}, + "max_suggestions": 5, + "start_with_defaults": False, + "seed": SEED, + }, + ) + + +@pytest.fixture +def mock_opt(tunable_groups: TunableGroups) -> MockOptimizer: + """Test fixture for MockOptimizer.""" + return MockOptimizer( + tunables=tunable_groups, + service=None, + config={"optimization_targets": {"score": "min"}, "max_suggestions": 5, "seed": SEED}, + ) + + +@pytest.fixture +def mock_opt_max(tunable_groups: TunableGroups) -> MockOptimizer: + """Test fixture for MockOptimizer.""" + return MockOptimizer( + tunables=tunable_groups, + service=None, + config={"optimization_targets": {"score": "max"}, "max_suggestions": 10, "seed": SEED}, + ) + + +@pytest.fixture +def flaml_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core FLAML optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "min"}, + "max_suggestions": 15, + "optimizer_type": "FLAML", + "seed": SEED, + }, + ) + + +@pytest.fixture +def flaml_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core FLAML optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "max"}, + "max_suggestions": 15, + "optimizer_type": "FLAML", + "seed": SEED, + }, + ) + + +# FIXME: SMAC's RF model can be non-deterministic at low iterations, which are +# normally calculated as a percentage of the max_suggestions and number of +# tunable dimensions, so for now we set the initial random samples equal to the +# number of iterations and control them with a seed. + +SMAC_ITERATIONS = 10 + + +@pytest.fixture +def smac_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core SMAC optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "min"}, + "max_suggestions": SMAC_ITERATIONS, + "optimizer_type": "SMAC", + "seed": SEED, + "output_directory": None, + # See Above + "n_random_init": SMAC_ITERATIONS, + "max_ratio": 1.0, + }, + ) + + +@pytest.fixture +def smac_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core SMAC optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "max"}, + "max_suggestions": SMAC_ITERATIONS, + "optimizer_type": "SMAC", + "seed": SEED, + "output_directory": None, + # See Above + "n_random_init": SMAC_ITERATIONS, + "max_ratio": 1.0, + }, + ) + + +@pytest.fixture +def manual_opt(tunable_groups: TunableGroups, mock_configs: list[dict]) -> ManualOptimizer: + """Test fixture for ManualOptimizer.""" + return ManualOptimizer( + tunables=tunable_groups, + service=None, + config={ + "max_cycles": 2, + "tunable_values_cycle": mock_configs, + }, + ) From 24ccf5a893d875d99f7b2d7df18dfc220a7e6fbe Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 17:04:59 -0500 Subject: [PATCH 053/109] reorg files based storage fixture --- .../mlos_bench/tests/storage/conftest.py | 1 + .../mlos_bench/tests/storage/sql/fixtures.py | 37 ++++++ .../tests/storage/test_storage_pickling.py | 117 ++++++++---------- 3 files changed, 88 insertions(+), 67 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index a143705282..c510793fac 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -12,6 +12,7 @@ # Expose some of those as local names so they can be picked up as fixtures by pytest. storage = sql_storage_fixtures.storage +sqlite_storage = sql_storage_fixtures.sqlite_storage exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index cb83bffd4f..8cd4954bdd 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -7,6 +7,10 @@ from collections.abc import Generator from random import seed as rand_seed +import json +import os +import tempfile + import pytest from mlos_bench.optimizers.mock_optimizer import MockOptimizer @@ -15,6 +19,7 @@ from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.storage_factory import from_config from mlos_bench.tests import SEED from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, @@ -26,6 +31,38 @@ # pylint: disable=redefined-outer-name +@pytest.fixture +def sqlite_storage() -> Generator[SqlStorage]: + """ + Fixture for file based SQLite storage in a temporary directory. + + Yields + ------ + Generator[SqlStorage] + + Notes + ----- + Can't be used in parallel tests on Windows. + """ + with tempfile.TemporaryDirectory() as tmpdir: + db_path = os.path.join(tmpdir, "mlos_bench.sqlite") + config_str = json.dumps( + { + "class": "mlos_bench.storage.sql.storage.SqlStorage", + "config": { + "drivername": "sqlite", + "database": db_path, + "lazy_schema_create": False, + }, + } + ) + + storage = from_config(config_str) + assert isinstance(storage, SqlStorage) + storage.update_schema() + yield storage + + @pytest.fixture def storage() -> SqlStorage: """Test fixture for in-memory SQLite3 storage.""" diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 3d5053837b..7871e7f68c 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -3,11 +3,8 @@ # Licensed under the MIT License. # """Test pickling and unpickling of Storage, and restoring Experiment and Trial by id.""" -import json -import os import pickle import sys -import tempfile from datetime import datetime from typing import Literal @@ -16,7 +13,6 @@ from mlos_bench.environments.status import Status from mlos_bench.storage.sql.storage import SqlStorage -from mlos_bench.storage.storage_factory import from_config from mlos_bench.tunables.tunable_groups import TunableGroups @@ -26,72 +22,59 @@ sys.platform == "win32", reason="Windows doesn't support multiple processes accessing the same file.", ) -def test_storage_pickle_restore_experiment_and_trial(tunable_groups: TunableGroups) -> None: +def test_storage_pickle_restore_experiment_and_trial( + sqlite_storage: SqlStorage, + tunable_groups: TunableGroups, +) -> None: """Check that we can pickle and unpickle the Storage object, and restore Experiment and Trial by id. """ - # pylint: disable=too-many-locals - with tempfile.TemporaryDirectory() as tmpdir: - db_path = os.path.join(tmpdir, "mlos_bench.sqlite") - config_str = json.dumps( - { - "class": "mlos_bench.storage.sql.storage.SqlStorage", - "config": { - "drivername": "sqlite", - "database": db_path, - "lazy_schema_create": False, - }, - } - ) + storage = sqlite_storage + # Create an Experiment and a Trial + opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} + experiment = storage.experiment( + experiment_id="experiment_id", + trial_id=0, + root_env_config="dummy_env.json", + description="Pickle test experiment", + tunables=tunable_groups, + opt_targets=opt_targets, + ) + with experiment: + trial = experiment.new_trial(tunable_groups) + trial_id_created = trial.trial_id + trial.set_trial_runner(1) + trial.update(Status.RUNNING, datetime.now(UTC)) - storage = from_config(config_str) - storage.update_schema() + # Pickle and unpickle the Storage object + pickled = pickle.dumps(storage) + restored_storage = pickle.loads(pickled) + assert isinstance(restored_storage, SqlStorage) - # Create an Experiment and a Trial - opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} - experiment = storage.experiment( - experiment_id="experiment_id", - trial_id=0, - root_env_config="dummy_env.json", - description="Pickle test experiment", - tunables=tunable_groups, - opt_targets=opt_targets, - ) - with experiment: - trial = experiment.new_trial(tunable_groups) - trial_id_created = trial.trial_id - trial.set_trial_runner(1) - trial.update(Status.RUNNING, datetime.now(UTC)) + # Restore the Experiment from storage by id and check that it matches the original + restored_experiment = restored_storage.get_experiment_by_id( + experiment_id=experiment.experiment_id, + tunables=tunable_groups, + opt_targets=opt_targets, + ) + assert restored_experiment is not None + assert restored_experiment is not experiment + assert restored_experiment.experiment_id == experiment.experiment_id + assert restored_experiment.description == experiment.description + assert restored_experiment.root_env_config == experiment.root_env_config + assert restored_experiment.tunables == experiment.tunables + assert restored_experiment.opt_targets == experiment.opt_targets + with restored_experiment: + # trial_id should have been restored during __enter__ + assert restored_experiment.trial_id == experiment.trial_id - # Pickle and unpickle the Storage object - pickled = pickle.dumps(storage) - restored_storage = pickle.loads(pickled) - assert isinstance(restored_storage, SqlStorage) - - # Restore the Experiment from storage by id and check that it matches the original - restored_experiment = restored_storage.get_experiment_by_id( - experiment_id=experiment.experiment_id, - tunables=tunable_groups, - opt_targets=opt_targets, - ) - assert restored_experiment is not None - assert restored_experiment is not experiment - assert restored_experiment.experiment_id == experiment.experiment_id - assert restored_experiment.description == experiment.description - assert restored_experiment.root_env_config == experiment.root_env_config - assert restored_experiment.tunables == experiment.tunables - assert restored_experiment.opt_targets == experiment.opt_targets - with restored_experiment: - # trial_id should have been restored during __enter__ - assert restored_experiment.trial_id == experiment.trial_id - - # Restore the Trial from storage by id and check that it matches the original - restored_trial = restored_experiment.get_trial_by_id(trial_id_created) - assert restored_trial is not None - assert restored_trial is not trial - assert restored_trial.trial_id == trial.trial_id - assert restored_trial.experiment_id == trial.experiment_id - assert restored_trial.tunables == trial.tunables - assert restored_trial.status == trial.status - assert restored_trial.config() == trial.config() - assert restored_trial.trial_runner_id == trial.trial_runner_id + # Restore the Trial from storage by id and check that it matches the original + restored_trial = restored_experiment.get_trial_by_id(trial_id_created) + assert restored_trial is not None + assert restored_trial is not trial + assert restored_trial.trial_id == trial.trial_id + assert restored_trial.experiment_id == trial.experiment_id + assert restored_trial.tunables == trial.tunables + assert restored_trial.status == trial.status + assert restored_trial.config() == trial.config() + assert restored_trial.trial_runner_id == trial.trial_runner_id From bdfd9b086cfe35cea88ff8a5d2e9b9b539a7de5e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 17:09:40 -0500 Subject: [PATCH 054/109] adding basic scheduler config example loader test --- .../tests/config/schedulers/conftest.py | 54 +++++++++++++++++++ .../test_load_scheduler_config_examples.py | 28 ++++++++-- 2 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/config/schedulers/conftest.py diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py new file mode 100644 index 0000000000..a1fd105839 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py @@ -0,0 +1,54 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Pytest fixtures for Scheduler config tests. + +Provides fixtures for creating multiple TrialRunner instances using the mock environment config. +""" + +from importlib.resources import files + +import pytest + +from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.util import path_join + +# pylint: disable=redefined-outer-name + +TRIAL_RUNNERS_COUNT = 4 + +@pytest.fixture +def mock_env_config_path() -> str: + """ + Returns the absolute path to the mock environment configuration file. + This file is used to create TrialRunner instances for testing. + """ + + # Use the files() routine to locate the file relative to this directory + return path_join( + str(files("mlos_bench.config").joinpath("environments", "mock", "mock_env.jsonc")), + abs_path=True, + ) + + +@pytest.fixture +def trial_runners( + config_loader_service: ConfigPersistenceService, + mock_env_config_path: str, +) -> list[TrialRunner]: + """ + Fixture that returns a list of TrialRunner instances using the mock environment config. + + Returns + ------- + list[TrialRunner] + List of TrialRunner instances created from the mock environment config. + """ + return TrialRunner.create_from_json( + config_loader=config_loader_service, + env_json=mock_env_config_path, + num_trial_runners=TRIAL_RUNNERS_COUNT, + ) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index 9127bbbfca..c8d6bcb172 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -8,14 +8,25 @@ import pytest from mlos_bench.config.schemas.config_schemas import ConfigSchema +from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.tests.config import locate_config_examples from mlos_bench.util import get_class_from_name +import mlos_bench.tests.storage.sql.fixtures +import mlos_bench.tests.optimizers.fixtures + +mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt +sqlite_storage = mlos_bench.tests.storage.sql.fixtures.sqlite_storage + + _LOG = logging.getLogger(__name__) _LOG.setLevel(logging.DEBUG) +# pylint: disable=redefined-outer-name # Get the set of configs to test. CONFIG_TYPE = "schedulers" @@ -38,18 +49,29 @@ def filter_configs(configs_to_filter: list[str]) -> list[str]: def test_load_scheduler_config_examples( config_loader_service: ConfigPersistenceService, config_path: str, + mock_env_config_path: str, + trial_runners: list[TrialRunner], + sqlite_storage: SqlStorage, + mock_opt: MockOptimizer, ) -> None: """Tests loading a config example.""" config = config_loader_service.load_config(config_path, ConfigSchema.SCHEDULER) assert isinstance(config, dict) - # Skip schema loading that would require a database connection for this test. - config["config"]["lazy_schema_create"] = True cls = get_class_from_name(config["class"]) assert issubclass(cls, Scheduler) + global_config = { + # Required configs generally provided by the Launcher. + "experiment_id": f"test_experiment_{__name__}", + "trial_id": 1, + } # Make an instance of the class based on the config. scheduler_inst = config_loader_service.build_scheduler( config=config, - service=config_loader_service, + global_config=global_config, + trial_runners=trial_runners, + optimizer=mock_opt, + storage=sqlite_storage, + root_env_config=mock_env_config_path, ) assert scheduler_inst is not None assert isinstance(scheduler_inst, cls) From b66e17a946e8764aacbb689f99fea0cd3d3ad847 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 17:12:33 -0500 Subject: [PATCH 055/109] Refactor Scheduler schema definitions to make it easier to add new ones. --- .../schedulers/base-scheduler-subschema.json | 69 ++++++++++++ .../schemas/schedulers/scheduler-schema.json | 104 ++---------------- .../schedulers/sync-scheduler-subschema.json | 27 +++++ 3 files changed, 107 insertions(+), 93 deletions(-) create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json create mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json new file mode 100644 index 0000000000..702da1eec3 --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json @@ -0,0 +1,69 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json", + "title": "mlos_bench base Scheduler config schema definitions", + "description": "mlos_bench base Scheduler config schema definitions for all Scheduler types.", + + "$defs": { + "base_scheduler_config": { + "$comment": "config properties common to all Scheduler types.", + "description": "The scheduler-specific config.", + "type": "object", + "minProperties": 1, + "properties": { + "experiment_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/experiment_id" + }, + "trial_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/trial_id" + }, + "config_id": { + "$ref": "../cli/common-defs-subschemas.json#/$defs/config_id" + }, + "teardown": { + "description": "Whether to teardown the experiment after running it.", + "type": "boolean" + }, + "max_trials": { + "description": "Max. number of trials to run. Use -1 or 0 for unlimited.", + "type": "integer", + "minimum": -1, + "examples": [50, -1] + }, + "trial_config_repeat_count": { + "description": "Number of times to repeat a config.", + "type": "integer", + "minimum": 1, + "examples": [3, 5] + } + } + } + }, + + "type": "object", + "properties": { + "$schema": { + "description": "The schema to use for validating the scheduler config (accepts both URLs and local paths).", + "type": "string", + "$comment": "This is optional, but if provided, should match the name of the root schema file.", + "pattern": "/schemas/schedulers/scheduler-schema.json$" + }, + + "description": { + "description": "Optional description of the config.", + "type": "string" + }, + + "class": { + "description": "The name of the scheduler class to use.", + "type": "string", + "$comment": "Exact matches are handled elsewhere.", + "pattern": "^mlos_bench[.]schedulers[.]" + }, + + "config": { + "$ref": "#/$defs/base_scheduler_config" + } + }, + "required": ["class"] +} diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json index 81b2e79754..8dfce6be36 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -2,105 +2,23 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", "title": "mlos_bench Scheduler config", - - "$defs": { - "comment": { - "$comment": "This section contains reusable partial schema bits (or just split out for readability)" - }, - - "config_base_scheduler": { - "$comment": "config properties common to all Scheduler types.", - "type": "object", - "properties": { - "experiment_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/experiment_id" - }, - "trial_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/trial_id" - }, - "config_id": { - "$ref": "../cli/common-defs-subschemas.json#/$defs/config_id" - }, - "teardown": { - "description": "Whether to teardown the experiment after running it.", - "type": "boolean" - }, - "max_trials": { - "description": "Max. number of trials to run. Use -1 or 0 for unlimited.", - "type": "integer", - "minimum": -1, - "examples": [50, -1] - }, - "trial_config_repeat_count": { - "description": "Number of times to repeat a config.", - "type": "integer", - "minimum": 1, - "examples": [3, 5] - } - } - } - }, - "description": "config for the mlos_bench scheduler", "$comment": "top level schema document rules", - "type": "object", - "properties": { - "$schema": { - "description": "The schema to use for validating the scheduler config (accepts both URLs and local paths).", - "type": "string", - "$comment": "This is optional, but if provided, should match the name of this file.", - "pattern": "/schemas/schedulers/scheduler-schema.json$" - }, - "description": { - "description": "Optional description of the config.", - "type": "string" - }, - - "class": { - "description": "The name of the scheduler class to use.", - "$comment": "required", - "enum": [ - "mlos_bench.schedulers.SyncScheduler", - "mlos_bench.schedulers.sync_scheduler.SyncScheduler" - ] + "type": "object", + "allOf": [ + { + "$comment": "All scheduler subschemas support these base properties.", + "$ref": "./base-scheduler-subschema.json" }, - - "config": { - "description": "The scheduler-specific config.", - "$comment": "Stub for scheduler-specific config appended with condition statements below", - "type": "object", - "minProperties": 1 - } - }, - "required": ["class"], - - "oneOf": [ { - "$comment": "extensions to the 'config' object properties when synchronous scheduler is being used", - "if": { - "properties": { - "class": { - "enum": [ - "mlos_bench.schedulers.SyncScheduler", - "mlos_bench.schedulers.sync_scheduler.SyncScheduler" - ] - } - }, - "required": ["class"] - }, - "then": { - "properties": { - "config": { - "type": "object", - "allOf": [{ "$ref": "#/$defs/config_base_scheduler" }], - "$comment": "disallow other properties", - "unevaluatedProperties": false - } + "$comment": "The set of known Scheduler subschemas. Add others as needed.", + "oneOf": [ + { + "$ref": "./sync-scheduler-subschema.json" } - }, - "else": false + ] } ], - "unevaluatedProperties": false + "required": ["class"] } diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json new file mode 100644 index 0000000000..e7f0e40eb2 --- /dev/null +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/sync-scheduler-subschema.json", + "title": "mlos_bench SyncScheduler config", + "description": "config for an mlos_bench SyncScheduler", + "type": "object", + "properties": { + "class": { + "enum": [ + "mlos_bench.schedulers.SyncScheduler", + "mlos_bench.schedulers.sync_scheduler.SyncScheduler" + ] + }, + "config": { + "type": "object", + "$comment": "No extra properties supported by SyncScheduler.", + "allOf": [ + { + "$ref": "base-scheduler-subschema.json#/$defs/base_scheduler_config" + } + ], + "minProperties": 1, + "unevaluatedProperties": false + } + }, + "required": ["class"] +} From 30191c094112b370c8d288b4fc89fac650e22428 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 22:38:43 +0000 Subject: [PATCH 056/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../mlos_bench/tests/config/schedulers/conftest.py | 10 +++++++--- .../schedulers/test_load_scheduler_config_examples.py | 5 ++--- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 5 ++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py index a1fd105839..fdf27162b3 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py @@ -5,25 +5,28 @@ """ Pytest fixtures for Scheduler config tests. -Provides fixtures for creating multiple TrialRunner instances using the mock environment config. +Provides fixtures for creating multiple TrialRunner instances using the mock environment +config. """ from importlib.resources import files import pytest -from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.util import path_join # pylint: disable=redefined-outer-name TRIAL_RUNNERS_COUNT = 4 + @pytest.fixture def mock_env_config_path() -> str: """ Returns the absolute path to the mock environment configuration file. + This file is used to create TrialRunner instances for testing. """ @@ -40,7 +43,8 @@ def trial_runners( mock_env_config_path: str, ) -> list[TrialRunner]: """ - Fixture that returns a list of TrialRunner instances using the mock environment config. + Fixture that returns a list of TrialRunner instances using the mock environment + config. Returns ------- diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index c8d6bcb172..bf10a99c46 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -7,6 +7,8 @@ import pytest +import mlos_bench.tests.optimizers.fixtures +import mlos_bench.tests.storage.sql.fixtures from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.base_scheduler import Scheduler @@ -16,9 +18,6 @@ from mlos_bench.tests.config import locate_config_examples from mlos_bench.util import get_class_from_name -import mlos_bench.tests.storage.sql.fixtures -import mlos_bench.tests.optimizers.fixtures - mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt sqlite_storage = mlos_bench.tests.storage.sql.fixtures.sqlite_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 8cd4954bdd..0bebeeff82 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,12 +4,11 @@ # """Test fixtures for mlos_bench storage.""" -from collections.abc import Generator -from random import seed as rand_seed - import json import os import tempfile +from collections.abc import Generator +from random import seed as rand_seed import pytest From bc8dc8f3b4878f0aa52fbcdbd2d68430d56f4f0c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 17:41:22 -0500 Subject: [PATCH 057/109] Refactor some test fixtures for better reuse so we can test loading Scheduler config examples. --- .../tests/config/schedulers/__init__.py | 3 + .../tests/config/schedulers/conftest.py | 54 ++++++ .../test_load_scheduler_config_examples.py | 77 ++++++++ .../mlos_bench/tests/optimizers/conftest.py | 180 ++---------------- .../mlos_bench/tests/optimizers/fixtures.py | 171 +++++++++++++++++ .../mlos_bench/tests/storage/conftest.py | 1 + .../mlos_bench/tests/storage/sql/fixtures.py | 37 ++++ .../tests/storage/test_storage_pickling.py | 117 +++++------- 8 files changed, 406 insertions(+), 234 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/config/schedulers/__init__.py create mode 100644 mlos_bench/mlos_bench/tests/config/schedulers/conftest.py create mode 100644 mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py create mode 100644 mlos_bench/mlos_bench/tests/optimizers/fixtures.py diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/config/schedulers/__init__.py new file mode 100644 index 0000000000..7838135124 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schedulers/__init__.py @@ -0,0 +1,3 @@ +""" +Unit tests for the mlos_bench Scheduler configs. +""" diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py new file mode 100644 index 0000000000..a1fd105839 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py @@ -0,0 +1,54 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Pytest fixtures for Scheduler config tests. + +Provides fixtures for creating multiple TrialRunner instances using the mock environment config. +""" + +from importlib.resources import files + +import pytest + +from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.util import path_join + +# pylint: disable=redefined-outer-name + +TRIAL_RUNNERS_COUNT = 4 + +@pytest.fixture +def mock_env_config_path() -> str: + """ + Returns the absolute path to the mock environment configuration file. + This file is used to create TrialRunner instances for testing. + """ + + # Use the files() routine to locate the file relative to this directory + return path_join( + str(files("mlos_bench.config").joinpath("environments", "mock", "mock_env.jsonc")), + abs_path=True, + ) + + +@pytest.fixture +def trial_runners( + config_loader_service: ConfigPersistenceService, + mock_env_config_path: str, +) -> list[TrialRunner]: + """ + Fixture that returns a list of TrialRunner instances using the mock environment config. + + Returns + ------- + list[TrialRunner] + List of TrialRunner instances created from the mock environment config. + """ + return TrialRunner.create_from_json( + config_loader=config_loader_service, + env_json=mock_env_config_path, + num_trial_runners=TRIAL_RUNNERS_COUNT, + ) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py new file mode 100644 index 0000000000..c8d6bcb172 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -0,0 +1,77 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Tests for loading scheduler config examples.""" +import logging + +import pytest + +from mlos_bench.config.schemas.config_schemas import ConfigSchema +from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.tests.config import locate_config_examples +from mlos_bench.util import get_class_from_name + +import mlos_bench.tests.storage.sql.fixtures +import mlos_bench.tests.optimizers.fixtures + +mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt +sqlite_storage = mlos_bench.tests.storage.sql.fixtures.sqlite_storage + + +_LOG = logging.getLogger(__name__) +_LOG.setLevel(logging.DEBUG) + +# pylint: disable=redefined-outer-name + +# Get the set of configs to test. +CONFIG_TYPE = "schedulers" + + +def filter_configs(configs_to_filter: list[str]) -> list[str]: + """If necessary, filter out json files that aren't for the module we're testing.""" + return configs_to_filter + + +configs = locate_config_examples( + ConfigPersistenceService.BUILTIN_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +assert configs + + +@pytest.mark.parametrize("config_path", configs) +def test_load_scheduler_config_examples( + config_loader_service: ConfigPersistenceService, + config_path: str, + mock_env_config_path: str, + trial_runners: list[TrialRunner], + sqlite_storage: SqlStorage, + mock_opt: MockOptimizer, +) -> None: + """Tests loading a config example.""" + config = config_loader_service.load_config(config_path, ConfigSchema.SCHEDULER) + assert isinstance(config, dict) + cls = get_class_from_name(config["class"]) + assert issubclass(cls, Scheduler) + global_config = { + # Required configs generally provided by the Launcher. + "experiment_id": f"test_experiment_{__name__}", + "trial_id": 1, + } + # Make an instance of the class based on the config. + scheduler_inst = config_loader_service.build_scheduler( + config=config, + global_config=global_config, + trial_runners=trial_runners, + optimizer=mock_opt, + storage=sqlite_storage, + root_env_config=mock_env_config_path, + ) + assert scheduler_inst is not None + assert isinstance(scheduler_inst, cls) diff --git a/mlos_bench/mlos_bench/tests/optimizers/conftest.py b/mlos_bench/mlos_bench/tests/optimizers/conftest.py index aaa6b14753..f1c758bca6 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/conftest.py +++ b/mlos_bench/mlos_bench/tests/optimizers/conftest.py @@ -2,170 +2,16 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Test fixtures for mlos_bench optimizers.""" - - -import pytest - -from mlos_bench.optimizers.manual_optimizer import ManualOptimizer -from mlos_bench.optimizers.mlos_core_optimizer import MlosCoreOptimizer -from mlos_bench.optimizers.mock_optimizer import MockOptimizer -from mlos_bench.tests import SEED -from mlos_bench.tunables.tunable_groups import TunableGroups - -# pylint: disable=redefined-outer-name - - -@pytest.fixture -def mock_configs() -> list[dict]: - """Mock configurations of earlier experiments.""" - return [ - { - "vmSize": "Standard_B4ms", - "idle": "halt", - "kernel_sched_migration_cost_ns": 50000, - "kernel_sched_latency_ns": 1000000, - }, - { - "vmSize": "Standard_B4ms", - "idle": "halt", - "kernel_sched_migration_cost_ns": 40000, - "kernel_sched_latency_ns": 2000000, - }, - { - "vmSize": "Standard_B4ms", - "idle": "mwait", - "kernel_sched_migration_cost_ns": -1, # Special value - "kernel_sched_latency_ns": 3000000, - }, - { - "vmSize": "Standard_B2s", - "idle": "mwait", - "kernel_sched_migration_cost_ns": 200000, - "kernel_sched_latency_ns": 4000000, - }, - ] - - -@pytest.fixture -def mock_opt_no_defaults(tunable_groups: TunableGroups) -> MockOptimizer: - """Test fixture for MockOptimizer that ignores the initial configuration.""" - return MockOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "min"}, - "max_suggestions": 5, - "start_with_defaults": False, - "seed": SEED, - }, - ) - - -@pytest.fixture -def mock_opt(tunable_groups: TunableGroups) -> MockOptimizer: - """Test fixture for MockOptimizer.""" - return MockOptimizer( - tunables=tunable_groups, - service=None, - config={"optimization_targets": {"score": "min"}, "max_suggestions": 5, "seed": SEED}, - ) - - -@pytest.fixture -def mock_opt_max(tunable_groups: TunableGroups) -> MockOptimizer: - """Test fixture for MockOptimizer.""" - return MockOptimizer( - tunables=tunable_groups, - service=None, - config={"optimization_targets": {"score": "max"}, "max_suggestions": 10, "seed": SEED}, - ) - - -@pytest.fixture -def flaml_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core FLAML optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "min"}, - "max_suggestions": 15, - "optimizer_type": "FLAML", - "seed": SEED, - }, - ) - - -@pytest.fixture -def flaml_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core FLAML optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "max"}, - "max_suggestions": 15, - "optimizer_type": "FLAML", - "seed": SEED, - }, - ) - - -# FIXME: SMAC's RF model can be non-deterministic at low iterations, which are -# normally calculated as a percentage of the max_suggestions and number of -# tunable dimensions, so for now we set the initial random samples equal to the -# number of iterations and control them with a seed. - -SMAC_ITERATIONS = 10 - - -@pytest.fixture -def smac_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core SMAC optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "min"}, - "max_suggestions": SMAC_ITERATIONS, - "optimizer_type": "SMAC", - "seed": SEED, - "output_directory": None, - # See Above - "n_random_init": SMAC_ITERATIONS, - "max_ratio": 1.0, - }, - ) - - -@pytest.fixture -def smac_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: - """Test fixture for mlos_core SMAC optimizer.""" - return MlosCoreOptimizer( - tunables=tunable_groups, - service=None, - config={ - "optimization_targets": {"score": "max"}, - "max_suggestions": SMAC_ITERATIONS, - "optimizer_type": "SMAC", - "seed": SEED, - "output_directory": None, - # See Above - "n_random_init": SMAC_ITERATIONS, - "max_ratio": 1.0, - }, - ) - - -@pytest.fixture -def manual_opt(tunable_groups: TunableGroups, mock_configs: list[dict]) -> ManualOptimizer: - """Test fixture for ManualOptimizer.""" - return ManualOptimizer( - tunables=tunable_groups, - service=None, - config={ - "max_cycles": 2, - "tunable_values_cycle": mock_configs, - }, - ) +"""Export test fixtures for mlos_bench optimizers.""" + +import mlos_bench.tests.optimizers.fixtures + +mock_configs = mlos_bench.tests.optimizers.fixtures.mock_configs +mock_opt_no_defaults = mlos_bench.tests.optimizers.fixtures.mock_opt_no_defaults +mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt +mock_opt_max = mlos_bench.tests.optimizers.fixtures.mock_opt_max +flaml_opt = mlos_bench.tests.optimizers.fixtures.flaml_opt +flaml_opt_max = mlos_bench.tests.optimizers.fixtures.flaml_opt_max +smac_opt = mlos_bench.tests.optimizers.fixtures.smac_opt +smac_opt_max = mlos_bench.tests.optimizers.fixtures.smac_opt_max +manual_opt = mlos_bench.tests.optimizers.fixtures.manual_opt diff --git a/mlos_bench/mlos_bench/tests/optimizers/fixtures.py b/mlos_bench/mlos_bench/tests/optimizers/fixtures.py new file mode 100644 index 0000000000..aaa6b14753 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/optimizers/fixtures.py @@ -0,0 +1,171 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Test fixtures for mlos_bench optimizers.""" + + +import pytest + +from mlos_bench.optimizers.manual_optimizer import ManualOptimizer +from mlos_bench.optimizers.mlos_core_optimizer import MlosCoreOptimizer +from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.tests import SEED +from mlos_bench.tunables.tunable_groups import TunableGroups + +# pylint: disable=redefined-outer-name + + +@pytest.fixture +def mock_configs() -> list[dict]: + """Mock configurations of earlier experiments.""" + return [ + { + "vmSize": "Standard_B4ms", + "idle": "halt", + "kernel_sched_migration_cost_ns": 50000, + "kernel_sched_latency_ns": 1000000, + }, + { + "vmSize": "Standard_B4ms", + "idle": "halt", + "kernel_sched_migration_cost_ns": 40000, + "kernel_sched_latency_ns": 2000000, + }, + { + "vmSize": "Standard_B4ms", + "idle": "mwait", + "kernel_sched_migration_cost_ns": -1, # Special value + "kernel_sched_latency_ns": 3000000, + }, + { + "vmSize": "Standard_B2s", + "idle": "mwait", + "kernel_sched_migration_cost_ns": 200000, + "kernel_sched_latency_ns": 4000000, + }, + ] + + +@pytest.fixture +def mock_opt_no_defaults(tunable_groups: TunableGroups) -> MockOptimizer: + """Test fixture for MockOptimizer that ignores the initial configuration.""" + return MockOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "min"}, + "max_suggestions": 5, + "start_with_defaults": False, + "seed": SEED, + }, + ) + + +@pytest.fixture +def mock_opt(tunable_groups: TunableGroups) -> MockOptimizer: + """Test fixture for MockOptimizer.""" + return MockOptimizer( + tunables=tunable_groups, + service=None, + config={"optimization_targets": {"score": "min"}, "max_suggestions": 5, "seed": SEED}, + ) + + +@pytest.fixture +def mock_opt_max(tunable_groups: TunableGroups) -> MockOptimizer: + """Test fixture for MockOptimizer.""" + return MockOptimizer( + tunables=tunable_groups, + service=None, + config={"optimization_targets": {"score": "max"}, "max_suggestions": 10, "seed": SEED}, + ) + + +@pytest.fixture +def flaml_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core FLAML optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "min"}, + "max_suggestions": 15, + "optimizer_type": "FLAML", + "seed": SEED, + }, + ) + + +@pytest.fixture +def flaml_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core FLAML optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "max"}, + "max_suggestions": 15, + "optimizer_type": "FLAML", + "seed": SEED, + }, + ) + + +# FIXME: SMAC's RF model can be non-deterministic at low iterations, which are +# normally calculated as a percentage of the max_suggestions and number of +# tunable dimensions, so for now we set the initial random samples equal to the +# number of iterations and control them with a seed. + +SMAC_ITERATIONS = 10 + + +@pytest.fixture +def smac_opt(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core SMAC optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "min"}, + "max_suggestions": SMAC_ITERATIONS, + "optimizer_type": "SMAC", + "seed": SEED, + "output_directory": None, + # See Above + "n_random_init": SMAC_ITERATIONS, + "max_ratio": 1.0, + }, + ) + + +@pytest.fixture +def smac_opt_max(tunable_groups: TunableGroups) -> MlosCoreOptimizer: + """Test fixture for mlos_core SMAC optimizer.""" + return MlosCoreOptimizer( + tunables=tunable_groups, + service=None, + config={ + "optimization_targets": {"score": "max"}, + "max_suggestions": SMAC_ITERATIONS, + "optimizer_type": "SMAC", + "seed": SEED, + "output_directory": None, + # See Above + "n_random_init": SMAC_ITERATIONS, + "max_ratio": 1.0, + }, + ) + + +@pytest.fixture +def manual_opt(tunable_groups: TunableGroups, mock_configs: list[dict]) -> ManualOptimizer: + """Test fixture for ManualOptimizer.""" + return ManualOptimizer( + tunables=tunable_groups, + service=None, + config={ + "max_cycles": 2, + "tunable_values_cycle": mock_configs, + }, + ) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index a143705282..c510793fac 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -12,6 +12,7 @@ # Expose some of those as local names so they can be picked up as fixtures by pytest. storage = sql_storage_fixtures.storage +sqlite_storage = sql_storage_fixtures.sqlite_storage exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index cb83bffd4f..8cd4954bdd 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -7,6 +7,10 @@ from collections.abc import Generator from random import seed as rand_seed +import json +import os +import tempfile + import pytest from mlos_bench.optimizers.mock_optimizer import MockOptimizer @@ -15,6 +19,7 @@ from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.storage_factory import from_config from mlos_bench.tests import SEED from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, @@ -26,6 +31,38 @@ # pylint: disable=redefined-outer-name +@pytest.fixture +def sqlite_storage() -> Generator[SqlStorage]: + """ + Fixture for file based SQLite storage in a temporary directory. + + Yields + ------ + Generator[SqlStorage] + + Notes + ----- + Can't be used in parallel tests on Windows. + """ + with tempfile.TemporaryDirectory() as tmpdir: + db_path = os.path.join(tmpdir, "mlos_bench.sqlite") + config_str = json.dumps( + { + "class": "mlos_bench.storage.sql.storage.SqlStorage", + "config": { + "drivername": "sqlite", + "database": db_path, + "lazy_schema_create": False, + }, + } + ) + + storage = from_config(config_str) + assert isinstance(storage, SqlStorage) + storage.update_schema() + yield storage + + @pytest.fixture def storage() -> SqlStorage: """Test fixture for in-memory SQLite3 storage.""" diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 3d5053837b..7871e7f68c 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -3,11 +3,8 @@ # Licensed under the MIT License. # """Test pickling and unpickling of Storage, and restoring Experiment and Trial by id.""" -import json -import os import pickle import sys -import tempfile from datetime import datetime from typing import Literal @@ -16,7 +13,6 @@ from mlos_bench.environments.status import Status from mlos_bench.storage.sql.storage import SqlStorage -from mlos_bench.storage.storage_factory import from_config from mlos_bench.tunables.tunable_groups import TunableGroups @@ -26,72 +22,59 @@ sys.platform == "win32", reason="Windows doesn't support multiple processes accessing the same file.", ) -def test_storage_pickle_restore_experiment_and_trial(tunable_groups: TunableGroups) -> None: +def test_storage_pickle_restore_experiment_and_trial( + sqlite_storage: SqlStorage, + tunable_groups: TunableGroups, +) -> None: """Check that we can pickle and unpickle the Storage object, and restore Experiment and Trial by id. """ - # pylint: disable=too-many-locals - with tempfile.TemporaryDirectory() as tmpdir: - db_path = os.path.join(tmpdir, "mlos_bench.sqlite") - config_str = json.dumps( - { - "class": "mlos_bench.storage.sql.storage.SqlStorage", - "config": { - "drivername": "sqlite", - "database": db_path, - "lazy_schema_create": False, - }, - } - ) + storage = sqlite_storage + # Create an Experiment and a Trial + opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} + experiment = storage.experiment( + experiment_id="experiment_id", + trial_id=0, + root_env_config="dummy_env.json", + description="Pickle test experiment", + tunables=tunable_groups, + opt_targets=opt_targets, + ) + with experiment: + trial = experiment.new_trial(tunable_groups) + trial_id_created = trial.trial_id + trial.set_trial_runner(1) + trial.update(Status.RUNNING, datetime.now(UTC)) - storage = from_config(config_str) - storage.update_schema() + # Pickle and unpickle the Storage object + pickled = pickle.dumps(storage) + restored_storage = pickle.loads(pickled) + assert isinstance(restored_storage, SqlStorage) - # Create an Experiment and a Trial - opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} - experiment = storage.experiment( - experiment_id="experiment_id", - trial_id=0, - root_env_config="dummy_env.json", - description="Pickle test experiment", - tunables=tunable_groups, - opt_targets=opt_targets, - ) - with experiment: - trial = experiment.new_trial(tunable_groups) - trial_id_created = trial.trial_id - trial.set_trial_runner(1) - trial.update(Status.RUNNING, datetime.now(UTC)) + # Restore the Experiment from storage by id and check that it matches the original + restored_experiment = restored_storage.get_experiment_by_id( + experiment_id=experiment.experiment_id, + tunables=tunable_groups, + opt_targets=opt_targets, + ) + assert restored_experiment is not None + assert restored_experiment is not experiment + assert restored_experiment.experiment_id == experiment.experiment_id + assert restored_experiment.description == experiment.description + assert restored_experiment.root_env_config == experiment.root_env_config + assert restored_experiment.tunables == experiment.tunables + assert restored_experiment.opt_targets == experiment.opt_targets + with restored_experiment: + # trial_id should have been restored during __enter__ + assert restored_experiment.trial_id == experiment.trial_id - # Pickle and unpickle the Storage object - pickled = pickle.dumps(storage) - restored_storage = pickle.loads(pickled) - assert isinstance(restored_storage, SqlStorage) - - # Restore the Experiment from storage by id and check that it matches the original - restored_experiment = restored_storage.get_experiment_by_id( - experiment_id=experiment.experiment_id, - tunables=tunable_groups, - opt_targets=opt_targets, - ) - assert restored_experiment is not None - assert restored_experiment is not experiment - assert restored_experiment.experiment_id == experiment.experiment_id - assert restored_experiment.description == experiment.description - assert restored_experiment.root_env_config == experiment.root_env_config - assert restored_experiment.tunables == experiment.tunables - assert restored_experiment.opt_targets == experiment.opt_targets - with restored_experiment: - # trial_id should have been restored during __enter__ - assert restored_experiment.trial_id == experiment.trial_id - - # Restore the Trial from storage by id and check that it matches the original - restored_trial = restored_experiment.get_trial_by_id(trial_id_created) - assert restored_trial is not None - assert restored_trial is not trial - assert restored_trial.trial_id == trial.trial_id - assert restored_trial.experiment_id == trial.experiment_id - assert restored_trial.tunables == trial.tunables - assert restored_trial.status == trial.status - assert restored_trial.config() == trial.config() - assert restored_trial.trial_runner_id == trial.trial_runner_id + # Restore the Trial from storage by id and check that it matches the original + restored_trial = restored_experiment.get_trial_by_id(trial_id_created) + assert restored_trial is not None + assert restored_trial is not trial + assert restored_trial.trial_id == trial.trial_id + assert restored_trial.experiment_id == trial.experiment_id + assert restored_trial.tunables == trial.tunables + assert restored_trial.status == trial.status + assert restored_trial.config() == trial.config() + assert restored_trial.trial_runner_id == trial.trial_runner_id From ea7b3ff26d0093fced5c56cad236ad8695cd0278 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 22:46:51 +0000 Subject: [PATCH 058/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../mlos_bench/tests/config/schedulers/__init__.py | 8 +++++--- .../mlos_bench/tests/config/schedulers/conftest.py | 10 +++++++--- .../schedulers/test_load_scheduler_config_examples.py | 5 ++--- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 5 ++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/config/schedulers/__init__.py index 7838135124..111238e6ac 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/__init__.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/__init__.py @@ -1,3 +1,5 @@ -""" -Unit tests for the mlos_bench Scheduler configs. -""" +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Unit tests for the mlos_bench Scheduler configs.""" diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py index a1fd105839..fdf27162b3 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py @@ -5,25 +5,28 @@ """ Pytest fixtures for Scheduler config tests. -Provides fixtures for creating multiple TrialRunner instances using the mock environment config. +Provides fixtures for creating multiple TrialRunner instances using the mock environment +config. """ from importlib.resources import files import pytest -from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.util import path_join # pylint: disable=redefined-outer-name TRIAL_RUNNERS_COUNT = 4 + @pytest.fixture def mock_env_config_path() -> str: """ Returns the absolute path to the mock environment configuration file. + This file is used to create TrialRunner instances for testing. """ @@ -40,7 +43,8 @@ def trial_runners( mock_env_config_path: str, ) -> list[TrialRunner]: """ - Fixture that returns a list of TrialRunner instances using the mock environment config. + Fixture that returns a list of TrialRunner instances using the mock environment + config. Returns ------- diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index c8d6bcb172..bf10a99c46 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -7,6 +7,8 @@ import pytest +import mlos_bench.tests.optimizers.fixtures +import mlos_bench.tests.storage.sql.fixtures from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.base_scheduler import Scheduler @@ -16,9 +18,6 @@ from mlos_bench.tests.config import locate_config_examples from mlos_bench.util import get_class_from_name -import mlos_bench.tests.storage.sql.fixtures -import mlos_bench.tests.optimizers.fixtures - mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt sqlite_storage = mlos_bench.tests.storage.sql.fixtures.sqlite_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 8cd4954bdd..0bebeeff82 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,12 +4,11 @@ # """Test fixtures for mlos_bench storage.""" -from collections.abc import Generator -from random import seed as rand_seed - import json import os import tempfile +from collections.abc import Generator +from random import seed as rand_seed import pytest From 7563b8aeef9c92c166f72315975fe1caf10acd61 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 17:57:26 -0500 Subject: [PATCH 059/109] fixup --- .../mlos_bench/config/schemas/schedulers/scheduler-schema.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json index 8dfce6be36..3086abacd7 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -20,5 +20,6 @@ ] } ], - "required": ["class"] + "required": ["class"], + "unevaluatedProperties": false } From b03e31c6aa5736f7fc2a0ef3fb8cbb4a67e142c0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 18:01:24 -0500 Subject: [PATCH 060/109] lint --- mlos_bench/mlos_bench/tests/config/schedulers/conftest.py | 1 - .../config/schedulers/test_load_scheduler_config_examples.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py index fdf27162b3..7136840056 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/conftest.py @@ -29,7 +29,6 @@ def mock_env_config_path() -> str: This file is used to create TrialRunner instances for testing. """ - # Use the files() routine to locate the file relative to this directory return path_join( str(files("mlos_bench.config").joinpath("environments", "mock", "mock_env.jsonc")), diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index bf10a99c46..a2d90a3d01 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -54,6 +54,7 @@ def test_load_scheduler_config_examples( mock_opt: MockOptimizer, ) -> None: """Tests loading a config example.""" + # pylint: disable=too-many-arguments,too-many-positional-arguments config = config_loader_service.load_config(config_path, ConfigSchema.SCHEDULER) assert isinstance(config, dict) cls = get_class_from_name(config["class"]) From 8eb14c0c815fa803a69ec3c1fb456ab3e8956d2b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 18:02:39 -0500 Subject: [PATCH 061/109] apply suggestions --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 0bebeeff82..1b5892e235 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -6,6 +6,7 @@ import json import os +import sys import tempfile from collections.abc import Generator from random import seed as rand_seed @@ -30,6 +31,10 @@ # pylint: disable=redefined-outer-name +@pytest.mark.skipif( + sys.platform == "win32", + reason="File-based SQLite storage can fail on Windows due to file-locking in parallel tests.", +) @pytest.fixture def sqlite_storage() -> Generator[SqlStorage]: """ From 8ad4c3db78f491b1ebd65b61a4c630219006c381 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 18:03:15 -0500 Subject: [PATCH 062/109] revert --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 1b5892e235..0bebeeff82 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -6,7 +6,6 @@ import json import os -import sys import tempfile from collections.abc import Generator from random import seed as rand_seed @@ -31,10 +30,6 @@ # pylint: disable=redefined-outer-name -@pytest.mark.skipif( - sys.platform == "win32", - reason="File-based SQLite storage can fail on Windows due to file-locking in parallel tests.", -) @pytest.fixture def sqlite_storage() -> Generator[SqlStorage]: """ From d4d5153715dfad0f620a57e4b1859bfbbd885321 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 18:17:36 -0500 Subject: [PATCH 063/109] load test configs too --- .../test_load_environment_config_examples.py | 10 +++++++++- .../config/experiments/experiment_test_config.jsonc | 4 ++++ .../optimizers/test_load_optimizer_config_examples.py | 10 +++++++++- .../schedulers/test_load_scheduler_config_examples.py | 10 +++++++++- .../services/test_load_service_config_examples.py | 10 +++++++++- .../storage/test_load_storage_config_examples.py | 9 ++++++++- 6 files changed, 48 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py b/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py index 889462f024..064530919c 100644 --- a/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py @@ -11,7 +11,7 @@ from mlos_bench.environments.base_environment import Environment from mlos_bench.environments.composite_env import CompositeEnv from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.tests.config import locate_config_examples +from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH from mlos_bench.tunables.tunable_groups import TunableGroups _LOG = logging.getLogger(__name__) @@ -39,6 +39,14 @@ def filter_configs(configs_to_filter: list[str]) -> list[str]: ) assert configs +test_configs = locate_config_examples( + BUILTIN_TEST_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +assert test_configs +configs.extend(test_configs) + @pytest.mark.parametrize("config_path", configs) def test_load_environment_config_examples( diff --git a/mlos_bench/mlos_bench/tests/config/experiments/experiment_test_config.jsonc b/mlos_bench/mlos_bench/tests/config/experiments/experiment_test_config.jsonc index 2ca87c6f21..c6f98c4963 100644 --- a/mlos_bench/mlos_bench/tests/config/experiments/experiment_test_config.jsonc +++ b/mlos_bench/mlos_bench/tests/config/experiments/experiment_test_config.jsonc @@ -15,6 +15,10 @@ "resourceGroup": "mlos-autotuning-test-rg", "location": "eastus", "vmName": "vmTestName", + "ssh_username": "testuser", + "ssh_priv_key_path": "/home/testuser/.ssh/id_rsa", + "ssh_hostname": "${vmName}", + "ssh_port": 22, "tunable_params_map": { "linux-runtime": ["linux-scheduler", "linux-swap"], "linux-boot": ["linux-kernel-boot"], diff --git a/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py b/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py index fceecd89f0..a507a3f01d 100644 --- a/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py @@ -10,7 +10,7 @@ from mlos_bench.config.schemas import ConfigSchema from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.tests.config import locate_config_examples +from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.util import get_class_from_name @@ -34,6 +34,14 @@ def filter_configs(configs_to_filter: list[str]) -> list[str]: ) assert configs +test_configs = locate_config_examples( + BUILTIN_TEST_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +# assert test_configs +configs.extend(test_configs) + @pytest.mark.parametrize("config_path", configs) def test_load_optimizer_config_examples( diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index a2d90a3d01..6e676e920f 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -15,7 +15,7 @@ from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.storage.sql.storage import SqlStorage -from mlos_bench.tests.config import locate_config_examples +from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH from mlos_bench.util import get_class_from_name mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt @@ -43,6 +43,14 @@ def filter_configs(configs_to_filter: list[str]) -> list[str]: ) assert configs +test_configs = locate_config_examples( + BUILTIN_TEST_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +# assert test_configs +configs.extend(test_configs) + @pytest.mark.parametrize("config_path", configs) def test_load_scheduler_config_examples( diff --git a/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py b/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py index beb0b1d018..84e7a1128c 100644 --- a/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py @@ -10,7 +10,7 @@ from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.services.base_service import Service from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.tests.config import locate_config_examples +from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH _LOG = logging.getLogger(__name__) _LOG.setLevel(logging.DEBUG) @@ -40,6 +40,14 @@ def predicate(config_path: str) -> bool: ) assert configs +test_configs = locate_config_examples( + BUILTIN_TEST_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +assert test_configs +configs.extend(test_configs) + @pytest.mark.parametrize("config_path", configs) def test_load_service_config_examples( diff --git a/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py b/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py index e3696a85fa..38582707cf 100644 --- a/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py @@ -10,7 +10,7 @@ from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.storage.base_storage import Storage -from mlos_bench.tests.config import locate_config_examples +from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH from mlos_bench.util import get_class_from_name _LOG = logging.getLogger(__name__) @@ -33,6 +33,13 @@ def filter_configs(configs_to_filter: list[str]) -> list[str]: ) assert configs +test_configs = locate_config_examples( + BUILTIN_TEST_CONFIG_PATH, + CONFIG_TYPE, + filter_configs, +) +# assert test_configs +configs.extend(test_configs) @pytest.mark.parametrize("config_path", configs) def test_load_storage_config_examples( From 1eb1acb7af53309948178cbe0ac1747793881660 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 18:18:03 -0500 Subject: [PATCH 064/109] format --- .../environments/test_load_environment_config_examples.py | 2 +- .../config/optimizers/test_load_optimizer_config_examples.py | 2 +- .../config/schedulers/test_load_scheduler_config_examples.py | 2 +- .../tests/config/services/test_load_service_config_examples.py | 2 +- .../tests/config/storage/test_load_storage_config_examples.py | 3 ++- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py b/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py index 064530919c..4b35282e53 100644 --- a/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/environments/test_load_environment_config_examples.py @@ -11,7 +11,7 @@ from mlos_bench.environments.base_environment import Environment from mlos_bench.environments.composite_env import CompositeEnv from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH +from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.tunables.tunable_groups import TunableGroups _LOG = logging.getLogger(__name__) diff --git a/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py b/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py index a507a3f01d..a407275438 100644 --- a/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/optimizers/test_load_optimizer_config_examples.py @@ -10,7 +10,7 @@ from mlos_bench.config.schemas import ConfigSchema from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH +from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.util import get_class_from_name diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index 6e676e920f..2bc4612257 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -15,7 +15,7 @@ from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.storage.sql.storage import SqlStorage -from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH +from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.util import get_class_from_name mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt diff --git a/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py b/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py index 84e7a1128c..96df98b29d 100644 --- a/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/services/test_load_service_config_examples.py @@ -10,7 +10,7 @@ from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.services.base_service import Service from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH +from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples _LOG = logging.getLogger(__name__) _LOG.setLevel(logging.DEBUG) diff --git a/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py b/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py index 38582707cf..680b3bacf1 100644 --- a/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/storage/test_load_storage_config_examples.py @@ -10,7 +10,7 @@ from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.storage.base_storage import Storage -from mlos_bench.tests.config import locate_config_examples, BUILTIN_TEST_CONFIG_PATH +from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.util import get_class_from_name _LOG = logging.getLogger(__name__) @@ -41,6 +41,7 @@ def filter_configs(configs_to_filter: list[str]) -> list[str]: # assert test_configs configs.extend(test_configs) + @pytest.mark.parametrize("config_path", configs) def test_load_storage_config_examples( config_loader_service: ConfigPersistenceService, From 1112af4251c7ecaac4b9879e4a095b9c425cddfd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 18:35:17 -0500 Subject: [PATCH 065/109] list in __all__ so we load it as a part of tests schemas checking --- mlos_bench/mlos_bench/tests/schedulers/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/schedulers/__init__.py index 4bc0076079..b166858875 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/__init__.py +++ b/mlos_bench/mlos_bench/tests/schedulers/__init__.py @@ -3,3 +3,8 @@ # Licensed under the MIT License. # """mlos_bench.tests.schedulers.""" +from mlos_bench.tests.schedulers.mock_scheduler import MockScheduler + +__all__ = [ + "MockScheduler", +] From 7a0d087a74dfec30f4b659634de10f74e7136eea Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 18:52:05 -0500 Subject: [PATCH 066/109] new mock scheduler schema and test configs --- .../schedulers/base-scheduler-subschema.json | 2 +- .../schedulers/mock-scheduler-subschema.json | 31 +++++++++++++++++-- ...-scheduler-invalid-mock-trial-data-2.jsonc | 11 +++++++ ...ck-scheduler-invalid-mock-trial-data.jsonc | 14 +++++++++ .../bad/unhandled/mock_sched-extra.jsonc | 7 +++++ .../good/full/mock_sched-full.jsonc | 31 +++++++++++++++++++ .../good/partial/mock_sched-partial.jsonc | 20 ++++++++++++ 7 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json index 702da1eec3..9417b7d00a 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json @@ -58,7 +58,7 @@ "description": "The name of the scheduler class to use.", "type": "string", "$comment": "Exact matches are handled elsewhere.", - "pattern": "^mlos_bench[.]schedulers[.]" + "pattern": "^mlos_bench([.]tests)?[.]schedulers[.]" }, "config": { diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json index a784145c69..56d7590094 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json @@ -25,8 +25,15 @@ "items": { "type": "object", "properties": { + "comments": { + "type": "string", + "description": "Optional comments about the trial status being reported." + }, "trial_id": { - "type": "string" + "type": "integer", + "description": "Unique identifier for the trial.", + "examples": [1, 2, 3], + "minimum": 1 }, "status": { "enum": [ @@ -41,9 +48,27 @@ "TIMED_OUT" ] }, - "score": "number" + "metrics": { + "type": "object", + "description": "A dictionary of metrics for the trial.", + "additionalProperties": { + "type": ["number", "string", "boolean"], + "description": "The value of the metric." + }, + "examples": [ + { + "score": 0.95, + "color": "green" + }, + { + "accuracy": 0.85, + "loss": 0.15 + } + ] + } }, - "required": ["trial_id", "status"] + "required": ["trial_id", "status"], + "additionalProperties": false } } }, diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc new file mode 100644 index 0000000000..09b74f4377 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc @@ -0,0 +1,11 @@ +{ + "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", + "config": { + "mock_trial_data": [ + { + // MISSING: "trial_id": 1, + "status": "SUCCEEDED" + } + ] + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc new file mode 100644 index 0000000000..144e7f21e6 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc @@ -0,0 +1,14 @@ +{ + "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", + "config": { + "mock_trial_data": [ + { + "trial_id": 1, + "status": "INVALID" + }, + { + "status": "SUCCEEDED" + } + ] + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc new file mode 100644 index 0000000000..c7bf9a46f7 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc @@ -0,0 +1,7 @@ +{ + "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", + "config": { + "extra": "unsupported" + }, + "extra": "unsupported" +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc new file mode 100644 index 0000000000..ca6ad2f905 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc @@ -0,0 +1,31 @@ +{ + "$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", + "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", + "config": { + "trial_config_repeat_count": 3, + "teardown": false, + "experiment_id": "MyExperimentName", + "config_id": 1, + "trial_id": 1, + "max_trials": 2, + + "mock_trial_data": [ + { + "status": "SUCCEEDED", + "trial_id": 1, + "metrics": { + "score": 0.9, + "color": "green" + } + }, + { + "status": "FAILED", + "trial_id": 2, + "metrics": { + "score": 0.1, + "color": "red" + } + } + ] + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc new file mode 100644 index 0000000000..2557c25cb4 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc @@ -0,0 +1,20 @@ +{ + "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", + "config": { + "mock_trial_data": [ + { + "status": "SUCCEEDED", + "trial_id": 1, + "metrics": { + "score": 0.9, + "color": "green" + } + }, + { + "status": "FAILED", + "trial_id": 2 + // missing metrics - OK + } + ] + } +} From dded2435f85a4695654b3cd35f6de9a3dd892293 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 19:03:24 -0500 Subject: [PATCH 067/109] refactor to split steps for easier hooking --- .../mlos_bench/schedulers/base_scheduler.py | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 1cd88fd585..3e47841591 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -242,8 +242,8 @@ def __exit__( self._in_context = False return False # Do not suppress exceptions - def start(self) -> None: - """Start the scheduling loop.""" + def _prepare_start(self) -> bool: + """Prepare the scheduler for starting.""" assert self.experiment is not None _LOG.info( "START: Experiment: %s Env: %s Optimizer: %s", @@ -262,21 +262,39 @@ def start(self) -> None: is_warm_up: bool = self.optimizer.supports_preload if not is_warm_up: _LOG.warning("Skip pending trials and warm-up: %s", self.optimizer) + return is_warm_up + def start(self) -> None: + """Start the scheduling loop.""" + assert self.experiment is not None + is_warm_up = self._prepare_start() not_done: bool = True while not_done: - _LOG.info("Optimization loop: Last trial ID: %d", self._last_trial_id) - self.run_schedule(is_warm_up) - not_done = self.add_new_optimizer_suggestions() - self.assign_trial_runners( - self.experiment.pending_trials( - datetime.now(UTC), - running=False, - trial_runner_assigned=False, - ) - ) + not_done = self._execute_scheduling_step(is_warm_up) is_warm_up = False + def _execute_scheduling_step(self, is_warm_up: bool) -> bool: + """ + Perform a single scheduling step. + + Notes + ----- + This method is called by the :py:meth:`Scheduler.start` method. + It is split out mostly to allow for easier testing with MockSchedulers. + """ + assert self.experiment is not None + _LOG.info("Optimization loop: Last trial ID: %d", self._last_trial_id) + self.run_schedule(is_warm_up) + not_done = self.add_new_optimizer_suggestions() + self.assign_trial_runners( + self.experiment.pending_trials( + datetime.now(UTC), + running=False, + trial_runner_assigned=False, + ) + ) + return not_done + def teardown(self) -> None: """ Tear down the TrialRunners/Environment(s). From f1fe022741a005db8618b8947d6000b52984902d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 May 2025 01:00:04 +0000 Subject: [PATCH 068/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../config/schedulers/test_load_scheduler_config_examples.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index dbcbf4a9fa..e901c456f5 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -23,7 +23,6 @@ storage = mlos_bench.tests.storage.sql.fixtures.storage - _LOG = logging.getLogger(__name__) _LOG.setLevel(logging.DEBUG) @@ -60,9 +59,7 @@ def test_load_scheduler_config_examples( config_path: str, mock_env_config_path: str, trial_runners: list[TrialRunner], - storage: SqlStorage, - mock_opt: MockOptimizer, ) -> None: """Tests loading a config example.""" @@ -82,9 +79,7 @@ def test_load_scheduler_config_examples( global_config=global_config, trial_runners=trial_runners, optimizer=mock_opt, - storage=storage, - root_env_config=mock_env_config_path, ) assert scheduler_inst is not None From 83ff70f8fa38bbcea0cfd7c3f6a6352e87e0bb9b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 20:02:09 -0500 Subject: [PATCH 069/109] Update mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py --- .../config/schedulers/test_load_scheduler_config_examples.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index e901c456f5..ad8f9248ac 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -19,7 +19,6 @@ from mlos_bench.util import get_class_from_name mock_opt = mlos_bench.tests.optimizers.fixtures.mock_opt - storage = mlos_bench.tests.storage.sql.fixtures.storage From 71420e65115b5bd162aad9e26ca8142fa10c73ed Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 May 2025 20:30:31 -0500 Subject: [PATCH 070/109] fixup --- mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py index 780b621046..b3e6f6cff9 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py @@ -11,7 +11,8 @@ from pytz import UTC -from mlos_bench.schedulers.base_scheduler import Optimizer, Scheduler +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.storage.base_storage import Storage From 70130babeed5ae4fcbf0a845cfebdb7696d47d00 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 May 2025 01:30:52 +0000 Subject: [PATCH 071/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py index b3e6f6cff9..43ffff0c34 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py @@ -11,8 +11,8 @@ from pytz import UTC -from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.optimizers.base_optimizer import Optimizer +from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.storage.base_storage import Storage From 0a440a1575c48238ee19376d2a4a783af5461933 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 20 May 2025 14:45:06 -0500 Subject: [PATCH 072/109] ignore the build tree in vscode --- .vscode/settings.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index a01d1d6418..f29dec766e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,6 +8,7 @@ "**/__pycache__/": true, "**/node_modules/": true, "**/*.egg-info": true, + "mlos_*/build/": true, "doc/source/autoapi/": true, "doc/build/doctrees/": true, "doc/build/html/": true, From efafc6f959f4bdab134b13da552195a90d388412 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:33:08 -0500 Subject: [PATCH 073/109] adding more accessors --- .../environments/base_environment.py | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index 094085c78b..98db5cfe14 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -363,6 +363,91 @@ def parameters(self) -> dict[str, TunableValue]: """ return self._params.copy() + @property + def current_trial_id(self) -> int: + """ + Get the current trial ID. + + This value can be used in scripts or environment variables to help + identify the Trial this Environment is currently running. + + Returns + ------- + trial_id : int + The current trial ID. + + Notes + ----- + This method is used to identify the current trial ID for the environment. + It is expected to be called *after* the base + :py:meth:`Environment.setup` method has been called and parameters have + been assigned. + """ + val = self._params["trial_id"] + assert isinstance(val, int), ( + "Expected trial_id to be an int, but got %s (type %s)", + val, + type(val), + ) + return val + + @property + def trial_runner_id(self) -> int: + """ + Get the ID of the + :py:class:`~.mlos_bench.schedulers.trial_runner.TrialRunner` for this + Environment. + + This value can be used in scripts or environment variables to help + identify the TrialRunner for this Environment. + + Returns + ------- + trial_runner_id : int + The trial runner ID. + + Notes + ----- + This shouldn't change during the lifetime of the Environment since each + Environment is assigned to a single TrialRunner. + """ + val = self._params["trial_runner_id"] + assert isinstance(val, int), ( + "Expected trial_runner_id to be an int, but got %s (type %s)", + val, + type(val), + ) + return val + + def experiment_id(self) -> int: + """ + Get the ID of the experiment. + + This value can be used in scripts or environment variables to help + identify the TrialRunner for this Environment. + + Returns + ------- + experiment_id : int + The ID of the experiment. + + Notes + ----- + This value comes from the globals config or ``mlos_bench`` CLI arguments + in the experiment setup. + + See Also + -------- + mlos_bench.config : documentation on the configuration system + """ + val = self._params["experiment_id"] + assert isinstance(val, int), ( + "Expected experiment_id to be an int, but got %s (type %s)", + val, + type(val), + ) + return val + def setup(self, tunables: TunableGroups, global_config: dict | None = None) -> bool: """ Set up a new benchmark environment, if necessary. This method must be From 842d393ae2bc5041060f6a78cc7ed07a4758465d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:34:02 -0500 Subject: [PATCH 074/109] wip: enable mock env to report arbitrary data --- .../mlos_bench/environments/mock_env.py | 211 +++++++++++++++++- 1 file changed, 206 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index ac6d9b7f00..e15dcffed4 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -6,8 +6,10 @@ import logging import random +from dataclasses import dataclass from datetime import datetime from typing import Any +import time import numpy @@ -21,6 +23,131 @@ _LOG = logging.getLogger(__name__) +@dataclass +class MockTrialPhaseData: + """Mock trial data for a specific phase of a trial.""" + + phase: str + """Phase of the trial data (e.g., setup, run, status, teardown).""" + + status: Status + """Status response for the phase.""" + + metrics: dict[str, TunableValue] | None = None + """Metrics response for the phase.""" + + sleep: float | None = 0.0 + """Optional sleep time in seconds to simulate phase execution time.""" + + exception: str | None = None + """Message of an exception to raise for the phase.""" + + @staticmethod + def from_dict(phase: str, data: dict | None) -> "MockTrialPhaseData": + """ + Create a MockTrialPhaseData instance from a dictionary. + + Parameters + ---------- + phase : str + Phase of the trial data. + data : dict | None + Dictionary containing the phase data. + + Returns + ------- + MockTrialPhaseData + Instance of MockTrialPhaseData. + """ + data = data or {} + assert phase in {"setup", "run", "status", "teardown"}, f"Invalid phase: {phase}" + if phase in {"teardown", "status"}: + # setup/teardown phase is not expected to have metrics or status. + assert "metrics" not in data, f"Unexpected metrics data in {phase} phase: {data}" + assert "status" not in data, f"Unexpected status data in {phase} phase: {data}" + if "sleep" in data: + assert isinstance( + data["sleep"], (int, float) + ), f"Invalid sleep in {phase} phase: {data}" + assert 60 >= data["sleep"] >= 0, f"Invalid sleep time in {phase} phase: {data}" + if "metrics" in data: + assert isinstance(data["metrics"], dict), f"Invalid metrics in {phase} phase: {data}" + default_phases = { + "run": Status.SUCCEEDED, + # FIXME: this causes issues if we report RUNNING instead of READY + "status": Status.READY, + } + status = Status.parse(data.get("status", default_phases.get(phase, Status.UNKNOWN))) + return MockTrialPhaseData( + phase=phase, + status=status, + metrics=data.get("metrics"), + sleep=data.get("sleep"), + exception=data.get("exception"), + ) + + +@dataclass +class MockTrialData: + """Mock trial data for a specific trial ID.""" + + trial_id: int + """Trial ID for the mock trial data.""" + setup: MockTrialPhaseData + """Setup phase data for the trial.""" + run: MockTrialPhaseData + """Run phase data for the trial.""" + status: MockTrialPhaseData + """Status phase data for the trial.""" + teardown: MockTrialPhaseData + """Teardown phase data for the trial.""" + + @staticmethod + def from_dict(trial_id: int, data: dict) -> "MockTrialData": + """ + Create a MockTrialData instance from a dictionary. + + Parameters + ---------- + trial_id : int + Trial ID for the mock trial data. + data : dict + Dictionary containing the trial data. + + Returns + ------- + MockTrialData + Instance of MockTrialData. + """ + return MockTrialData( + trial_id=trial_id, + setup=MockTrialPhaseData.from_dict("setup", data.get("setup")), + run=MockTrialPhaseData.from_dict("run", data.get("run")), + status=MockTrialPhaseData.from_dict("status", data.get("status")), + teardown=MockTrialPhaseData.from_dict("teardown", data.get("teardown")), + ) + + @staticmethod + def load_mock_trial_data(mock_trial_data: dict) -> dict[int, "MockTrialData"]: + """ + Load mock trial data from a dictionary. + + Parameters + ---------- + mock_trial_data : dict + Dictionary containing mock trial data. + + Returns + ------- + dict[int, MockTrialData] + Dictionary of mock trial data keyed by trial ID. + """ + return { + int(trial_id): MockTrialData.from_dict(trial_id=int(trial_id), data=trial_data) + for trial_id, trial_data in mock_trial_data.items() + } + + class MockEnv(Environment): """Scheduler-side environment to mock the benchmark results.""" @@ -55,6 +182,19 @@ def __init__( # pylint: disable=too-many-arguments service: Service An optional service object. Not used by this class. """ + # First allow merging mock_trial_data from the global_config into the + # config so we can check it against the JSON schema for expected data + # types. + if global_config and "mock_trial_data" in global_config: + mock_trial_data = global_config["mock_trial_data"] + if not isinstance(mock_trial_data, dict): + raise ValueError(f"Invalid mock_trial_data in global_config: {mock_trial_data}") + # Merge the mock trial data into the config. + config["mock_trial_data"] = { + **config.get("mock_trial_data", {}), + **mock_trial_data, + } + super().__init__( name=name, config=config, @@ -62,6 +202,9 @@ def __init__( # pylint: disable=too-many-arguments tunables=tunables, service=service, ) + self._mock_trial_data = MockTrialData.load_mock_trial_data( + self.config.get("mock_trial_data", {}) + ) seed = int(self.config.get("mock_env_seed", -1)) self._run_random = random.Random(seed or None) if seed >= 0 else None self._status_random = random.Random(seed or None) if seed >= 0 else None @@ -83,6 +226,43 @@ def _produce_metrics(self, rand: random.Random | None) -> dict[str, TunableValue return {metric: float(score) for metric in self._metrics or []} + def get_current_mock_trial_data(self) -> MockTrialData: + """Gets mock trial data for the current trial ID. + + If no (or missing) mock trial data is found, a new instance of + MockTrialData is created from random data. + + Note + ---- + This method must be called after the base :py:meth:`Environment.setup` + method is called to ensure the current ``trial_id`` is set. + """ + trial_id = self.current_trial_id + mock_trial_data = self._mock_trial_data.get(trial_id) + if not mock_trial_data: + mock_trial_data = MockTrialData( + trial_id=trial_id, + setup=MockTrialPhaseData.from_dict(phase="setup", data=None), + run=MockTrialPhaseData.from_dict(phase="run", data=None), + status=MockTrialPhaseData.from_dict(phase="status", data=None), + teardown=MockTrialPhaseData.from_dict(phase="teardown", data=None), + ) + # Save the generated data for later. + self._mock_trial_data[trial_id] = mock_trial_data + return mock_trial_data + + def setup(self, tunables: TunableGroups, global_config: dict | None = None) -> bool: + is_success = super().setup(tunables, global_config) + mock_trial_data = self.get_current_mock_trial_data() + if mock_trial_data.setup.sleep: + _LOG.debug("Sleeping for %s seconds", mock_trial_data.setup.sleep) + time.sleep(mock_trial_data.setup.sleep) + if mock_trial_data.setup.exception: + raise RuntimeError( + f"Mock trial data setup exception: {mock_trial_data.setup.exception}" + ) + return is_success and mock_trial_data.setup.status.is_ready() + def run(self) -> tuple[Status, datetime, dict[str, TunableValue] | None]: """ Produce mock benchmark data for one experiment. @@ -99,8 +279,17 @@ def run(self) -> tuple[Status, datetime, dict[str, TunableValue] | None]: (status, timestamp, _) = result = super().run() if not status.is_ready(): return result - metrics = self._produce_metrics(self._run_random) - return (Status.SUCCEEDED, timestamp, metrics) + mock_trial_data = self.get_current_mock_trial_data() + if mock_trial_data.run.sleep: + _LOG.debug("Sleeping for %s seconds", mock_trial_data.run.sleep) + time.sleep(mock_trial_data.run.sleep) + if mock_trial_data.run.exception: + raise RuntimeError(f"Mock trial data run exception: {mock_trial_data.run.exception}") + + if mock_trial_data.run.metrics is None: + # If no metrics are provided, generate them. + mock_trial_data.run.metrics = self._produce_metrics(self._run_random) + return (mock_trial_data.run.status, timestamp, mock_trial_data.run.metrics) def status(self) -> tuple[Status, datetime, list[tuple[datetime, str, Any]]]: """ @@ -116,10 +305,22 @@ def status(self) -> tuple[Status, datetime, list[tuple[datetime, str, Any]]]: (status, timestamp, _) = result = super().status() if not status.is_ready(): return result - metrics = self._produce_metrics(self._status_random) + mock_trial_data = self.get_current_mock_trial_data() + if mock_trial_data.status.sleep: + _LOG.debug("Sleeping for %s seconds", mock_trial_data.status.sleep) + time.sleep(mock_trial_data.status.sleep) + if mock_trial_data.status.exception: + raise RuntimeError( + f"Mock trial data status exception: {mock_trial_data.status.exception}" + ) + if mock_trial_data.status.metrics is None: + # If no metrics are provided, generate them. + metrics = self._produce_metrics(self._status_random) + else: + # If metrics are provided, use them. + metrics = mock_trial_data.status.metrics return ( - # FIXME: this causes issues if we report RUNNING instead of READY - Status.READY, + mock_trial_data.status.status, timestamp, [(timestamp, metric, score) for (metric, score) in metrics.items()], ) From 19cec783a952ecbd83a3a63fcf43ec31db24173e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:34:10 -0500 Subject: [PATCH 075/109] spelling --- mlos_bench/mlos_bench/environments/script_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/environments/script_env.py b/mlos_bench/mlos_bench/environments/script_env.py index 6ac4674cfe..d71eb66183 100644 --- a/mlos_bench/mlos_bench/environments/script_env.py +++ b/mlos_bench/mlos_bench/environments/script_env.py @@ -5,7 +5,7 @@ """ Base scriptable benchmark environment. -TODO: Document how variable propogation works in the script environments using +TODO: Document how variable propagation works in the script environments using shell_env_params, required_args, const_args, etc. """ From 175f315c6fd9638c5122256c324a96007837bb70 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:34:47 -0500 Subject: [PATCH 076/109] refactor status parsing a little bit again --- mlos_bench/mlos_bench/environments/status.py | 37 +++++++++++++------ mlos_bench/mlos_bench/storage/sql/common.py | 2 +- .../mlos_bench/storage/sql/experiment.py | 6 +-- .../tests/environments/test_status.py | 8 ++-- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 6d76d7206c..d49c4a9e0f 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -24,21 +24,36 @@ class Status(enum.Enum): TIMED_OUT = 7 @staticmethod - def from_str(status_str: Any) -> "Status": - """Convert a string to a Status enum.""" - if not isinstance(status_str, str): - _LOG.warning("Expected type %s for status: %s", type(status_str), status_str) - status_str = str(status_str) - if status_str.isdigit(): + def parse(status: Any) -> "Status": + """Convert the input to a Status enum. + + Parameters + ---------- + status : Any + The status to parse. This can be a string (or string convertible), + int, or Status enum. + + Returns + ------- + Status + The corresponding Status enum value or else UNKNOWN if the input is not + recognized. + """ + if isinstance(status, Status): + return status + if not isinstance(status, str): + _LOG.warning("Expected type %s for status: %s", type(status), status) + status = str(status) + if status.isdigit(): try: - return Status(int(status_str)) + return Status(int(status)) except ValueError: - _LOG.warning("Unknown status: %d", int(status_str)) + _LOG.warning("Unknown status: %d", int(status)) try: - status_str = status_str.upper().strip() - return Status[status_str] + status = status.upper().strip() + return Status[status] except KeyError: - _LOG.warning("Unknown status: %s", status_str) + _LOG.warning("Unknown status: %s", status) return Status.UNKNOWN def is_good(self) -> bool: diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index 97eb270c9d..032cf9259d 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -95,7 +95,7 @@ def get_trials( config_id=trial.config_id, ts_start=utcify_timestamp(trial.ts_start, origin="utc"), ts_end=utcify_nullable_timestamp(trial.ts_end, origin="utc"), - status=Status.from_str(trial.status), + status=Status.parse(trial.status), trial_runner_id=trial.trial_runner_id, ) for trial in trials.fetchall() diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index acc2a497b4..0e380e3e13 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -188,7 +188,7 @@ def load( status: list[Status] = [] for trial in cur_trials.fetchall(): - stat = Status.from_str(trial.status) + stat = Status.parse(trial.status) status.append(stat) trial_ids.append(trial.trial_id) configs.append( @@ -272,7 +272,7 @@ def get_trial_by_id( config_id=trial.config_id, trial_runner_id=trial.trial_runner_id, opt_targets=self._opt_targets, - status=Status.from_str(trial.status), + status=Status.parse(trial.status), restoring=True, config=config, ) @@ -330,7 +330,7 @@ def pending_trials( config_id=trial.config_id, trial_runner_id=trial.trial_runner_id, opt_targets=self._opt_targets, - status=Status.from_str(trial.status), + status=Status.parse(trial.status), restoring=True, config=config, ) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 3c0a9bccf3..8123f2b852 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -51,16 +51,16 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: Expected Status enum value. """ assert ( - Status.from_str(input_str) == expected_status + Status.parse(input_str) == expected_status ), f"Expected {expected_status} for input: {input_str}" # Check lowercase representation assert ( - Status.from_str(input_str.lower()) == expected_status + Status.parse(input_str.lower()) == expected_status ), f"Expected {expected_status} for input: {input_str.lower()}" if input_str.isdigit(): # Also test the numeric representation assert ( - Status.from_str(int(input_str)) == expected_status + Status.parse(int(input_str)) == expected_status ), f"Expected {expected_status} for input: {int(input_str)}" @@ -83,7 +83,7 @@ def test_status_from_str_invalid(invalid_input: Any) -> None: input. """ assert ( - Status.from_str(invalid_input) == Status.UNKNOWN + Status.parse(invalid_input) == Status.UNKNOWN ), f"Expected Status.UNKNOWN for invalid input: {invalid_input}" From 7904716320c7931196c2c2de506c90a4c2bce9ce Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:36:27 -0500 Subject: [PATCH 077/109] extra test too --- mlos_bench/mlos_bench/tests/environments/test_status.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 8123f2b852..785275825c 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -57,6 +57,9 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: assert ( Status.parse(input_str.lower()) == expected_status ), f"Expected {expected_status} for input: {input_str.lower()}" + assert ( + Status.parse(expected_status) == expected_status + ), f"Expected {expected_status} for input: {expected_status}" if input_str.isdigit(): # Also test the numeric representation assert ( From 54a96e88a6bfbda80269cec2b7c6b2be67950e86 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:36:34 -0500 Subject: [PATCH 078/109] more links --- mlos_bench/mlos_bench/launcher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index c728ed7fb2..353ace23f0 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -55,8 +55,9 @@ def __init__(self, description: str, long_text: str = "", argv: list[str] | None Other required_args values can also be pulled from shell environment variables. - For additional details, please see the website or the README.md files in - the source tree: + For additional details, please see the documentation website or the + README.md files in the source tree: + """ parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog) From 2e1c4db8fb694796e25a8a0a963ffb286b8e7ce5 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:36:48 -0500 Subject: [PATCH 079/109] start adding a mock trial runner --- .../tests/schedulers/mock_trial_runner.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py b/mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py new file mode 100644 index 0000000000..d6cc21d54f --- /dev/null +++ b/mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py @@ -0,0 +1,89 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +MockTrialRunner for testing :py:class:`mlos_bench.schedulers.Scheduler` logic +with mock trial data. + +This class is intended for use in unit tests and allows for deterministic trial +execution by returning pre-specified results from the ``global_config``. + +Example +------- +Setup mock trial data in the global_config. + +>>> mock_trial_data = { +... 1: { +... "trial_id": 1, +... "status": "SUCCEEDED", +... "metrics": { +... "score": 42.0, +... "color": "red" +... }, +... # Optional sleep time in seconds +... "sleep": 0.1 +... }, +... # Add more trials as needed. +... } + +Normally, this would be part of the global_config passed to the scheduler. +>>> global_config = { +... "mock_trial_data": mock_trial_data, +... # Other global config parameters... +... } + +>>> runner = MockTrialRunner(0, env) +>>> status, timestamp, metrics = runner.run_trial(trial, global_config) +>>> print(status, metrics) +Status.SUCCEEDED {'score': 42.0, 'color': 'red'} +""" +import time +from datetime import datetime +from typing import Any + +from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.storage.base_storage import Storage +from mlos_bench.environments.status import Status +from mlos_bench.tunables.tunable_types import TunableValue + + +class MockTrialRunner(TrialRunner): + """ + Mock implementation of TrialRunner for testing. + + This class overrides the run_trial method to return mock results + from the global_config["mock_trial_data"] dict, keyed by trial_id. + + """ + + def run_trial( + self, + trial: Storage.Trial, + global_config: dict[str, Any] | None = None, + ) -> tuple[Status, datetime, dict[str, TunableValue] | None]: + """ + Run a mock trial using data from global_config["mock_trial_data"]. + + Parameters + ---------- + trial : Storage.Trial + The trial object, must have a trial_id attribute. + global_config : dict + Global configuration, must contain "mock_trial_data". + + Returns + ------- + (status, timestamp, metrics) : (Status, datetime, dict[str, TunableValue] | None) + Status, timestamp, and metrics for the mock trial. + """ + assert global_config is not None, "global_config must be provided." + mock_data = global_config.get("mock_trial_data", {}) + trial_id = getattr(trial, "trial_id", None) + assert trial_id in mock_data, f"No mock data for trial_id {trial_id}" + data = mock_data[trial_id] + sleep_time = data.get("sleep", 0.01) + time.sleep(sleep_time) + status = Status[data.get("status", "SUCCEEDED")] + metrics = data.get("metrics", {}) + timestamp = datetime.now() + return status, timestamp, metrics From 626554d65a6a5c687fd5a4addb8913e0b13079e9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:36:56 -0500 Subject: [PATCH 080/109] Revert "start adding a mock trial runner" can do that all in mock_env This reverts commit 2e1c4db8fb694796e25a8a0a963ffb286b8e7ce5. --- .../tests/schedulers/mock_trial_runner.py | 89 ------------------- 1 file changed, 89 deletions(-) delete mode 100644 mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py b/mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py deleted file mode 100644 index d6cc21d54f..0000000000 --- a/mlos_bench/mlos_bench/tests/schedulers/mock_trial_runner.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# -""" -MockTrialRunner for testing :py:class:`mlos_bench.schedulers.Scheduler` logic -with mock trial data. - -This class is intended for use in unit tests and allows for deterministic trial -execution by returning pre-specified results from the ``global_config``. - -Example -------- -Setup mock trial data in the global_config. - ->>> mock_trial_data = { -... 1: { -... "trial_id": 1, -... "status": "SUCCEEDED", -... "metrics": { -... "score": 42.0, -... "color": "red" -... }, -... # Optional sleep time in seconds -... "sleep": 0.1 -... }, -... # Add more trials as needed. -... } - -Normally, this would be part of the global_config passed to the scheduler. ->>> global_config = { -... "mock_trial_data": mock_trial_data, -... # Other global config parameters... -... } - ->>> runner = MockTrialRunner(0, env) ->>> status, timestamp, metrics = runner.run_trial(trial, global_config) ->>> print(status, metrics) -Status.SUCCEEDED {'score': 42.0, 'color': 'red'} -""" -import time -from datetime import datetime -from typing import Any - -from mlos_bench.schedulers.trial_runner import TrialRunner -from mlos_bench.storage.base_storage import Storage -from mlos_bench.environments.status import Status -from mlos_bench.tunables.tunable_types import TunableValue - - -class MockTrialRunner(TrialRunner): - """ - Mock implementation of TrialRunner for testing. - - This class overrides the run_trial method to return mock results - from the global_config["mock_trial_data"] dict, keyed by trial_id. - - """ - - def run_trial( - self, - trial: Storage.Trial, - global_config: dict[str, Any] | None = None, - ) -> tuple[Status, datetime, dict[str, TunableValue] | None]: - """ - Run a mock trial using data from global_config["mock_trial_data"]. - - Parameters - ---------- - trial : Storage.Trial - The trial object, must have a trial_id attribute. - global_config : dict - Global configuration, must contain "mock_trial_data". - - Returns - ------- - (status, timestamp, metrics) : (Status, datetime, dict[str, TunableValue] | None) - Status, timestamp, and metrics for the mock trial. - """ - assert global_config is not None, "global_config must be provided." - mock_data = global_config.get("mock_trial_data", {}) - trial_id = getattr(trial, "trial_id", None) - assert trial_id in mock_data, f"No mock data for trial_id {trial_id}" - data = mock_data[trial_id] - sleep_time = data.get("sleep", 0.01) - time.sleep(sleep_time) - status = Status[data.get("status", "SUCCEEDED")] - metrics = data.get("metrics", {}) - timestamp = datetime.now() - return status, timestamp, metrics From b87c02a0e174dbb298c5b28e28a6c9c6f2ee0ebe Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:11:41 -0500 Subject: [PATCH 081/109] schema work on mock trial data --- .../environments/mock-env-subschema.json | 129 ++++++++++++++++++ .../mock_env-bad-trial-data-fields.jsonc | 24 ++++ .../invalid/mock_env-bad-trial-data-ids.jsonc | 13 ++ .../mock_env-trial-data-extras.jsonc | 15 ++ .../test-cases/good/full/mock_env-full.jsonc | 36 ++++- 5 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-fields.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-ids.jsonc create mode 100644 mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/unhandled/mock_env-trial-data-extras.jsonc diff --git a/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json b/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json index cb2de6c719..fd2c84876d 100644 --- a/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json @@ -3,6 +3,124 @@ "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json", "title": "mlos_bench MockEnv config", "description": "Config instance for a mlos_bench MockEnv", + + "$defs": { + "mock_trial_common_phase_data": { + "type": "object", + "properties": { + "sleep": { + "type": "number", + "description": "Optional time to sleep (in seconds) before returning from this phase of the trial.", + "examples": [0, 0.1, 0.5, 1, 2], + "minimum": 0, + "maximum": 60 + }, + "exception": { + "type": "string", + "description": "Optional exception message to raise during phase." + } + } + }, + "mock_trial_status_run_phase_data": { + "type": "object", + "properties": { + "status": { + "description": "The status to report for this phase of the trial. Default is phase dependent.", + "enum": [ + "UNKNOWN", + "PENDING", + "READY", + "RUNNING", + "SUCCEEDED", + "CANCELED", + "FAILED", + "TIMED_OUT" + ] + }, + "metrics": { + "type": "object", + "description": "A dictionary of metrics for this phase of the trial.", + "additionalProperties": { + "type": [ + "number", + "string", + "boolean" + ], + "description": "The value of the metric." + }, + "examples": [ + { + "score": 0.95, + "color": "green" + }, + { + "accuracy": 0.85, + "loss": 0.15 + } + ], + "minProperties": 0 + } + } + }, + "mock_trial_data": { + "type": "object", + "properties": { + "run": { + "description": "A dictionary of trial data for the run phase.", + "type": "object", + "allOf": [ + { + "$ref": "#/$defs/mock_trial_common_phase_data" + }, + { + "$ref": "#/$defs/mock_trial_status_run_phase_data" + } + ], + "minProperties": 1, + "unevaluatedProperties": false + }, + "status": { + "description": "A dictionary of trial data for the status phase.", + "type": "object", + "allOf": [ + { + "$ref": "#/$defs/mock_trial_common_phase_data" + }, + { + "$ref": "#/$defs/mock_trial_status_run_phase_data" + } + ], + "minProperties": 1, + "unevaluatedProperties": false + }, + "setup": { + "description": "A dictionary of trial data for the setup phase.", + "type": "object", + "allOf": [ + { + "$ref": "#/$defs/mock_trial_common_phase_data" + } + ], + "minProperties": 1, + "unevaluatedProperties": false + }, + "teardown": { + "description": "A dictionary of trial data for the teardown phase.", + "type": "object", + "allOf": [ + { + "$ref": "#/$defs/mock_trial_common_phase_data" + } + ], + "minProperties": 1, + "unevaluatedProperties": false + } + }, + "unevaluatedProperties": false, + "minProperties": 1 + } + }, + "type": "object", "properties": { "class": { @@ -42,6 +160,17 @@ }, "minItems": 1, "uniqueItems": true + }, + "mock_trial_data": { + "description": "A list of trial data to use for testing.", + "type": "object", + "patternProperties": { + "^[0-9]+$": { + "$ref": "#/$defs/mock_trial_data" + } + }, + "unevaluatedProperties": false, + "minProperties": 1 } } } diff --git a/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-fields.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-fields.jsonc new file mode 100644 index 0000000000..d36559cf33 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-fields.jsonc @@ -0,0 +1,24 @@ +{ + "class": "mlos_bench.environments.mock_env.MockEnv", + "config": { + "mock_trial_data": { + "1": { + "run": { + // bad types + "status": null, + "metrics": [], + "exception": null, + "sleep": "1", + }, + // missing fields + "setup": {}, + "teardown": { + "status": "UNKNOWN", + "metrics": { + "unexpected": "field" + } + } + } + } + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-ids.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-ids.jsonc new file mode 100644 index 0000000000..400e557d0f --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/invalid/mock_env-bad-trial-data-ids.jsonc @@ -0,0 +1,13 @@ +{ + "class": "mlos_bench.environments.mock_env.MockEnv", + "config": { + "mock_trial_data": { + // invalid trial id + "trial_id_1": { + "run": { + "status": "FAILED" + } + } + } + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/unhandled/mock_env-trial-data-extras.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/unhandled/mock_env-trial-data-extras.jsonc new file mode 100644 index 0000000000..ecdf4cd0f5 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/bad/unhandled/mock_env-trial-data-extras.jsonc @@ -0,0 +1,15 @@ +{ + "class": "mlos_bench.environments.mock_env.MockEnv", + "config": { + "mock_trial_data": { + "1": { + "new_phase": { + "status": "FAILED" + }, + "run": { + "expected": "property" + } + } + } + } +} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc index a00f8ca60c..b618627ecf 100644 --- a/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc +++ b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/environments/environment-schema.json", + //"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/environments/environment-schema.json", "name": "mock_env-full", "description": "More descriptive text.", @@ -25,6 +25,38 @@ "mock_env_metrics": [ "latency", "cost" - ] + ], + "mock_trial_data": { + "1": { + "setup": { + "sleep": 0.1 + }, + "status": { + "metrics": { + "latency": 0.2, + "cost": 0.3 + } + }, + "run": { + "sleep": 0.2, + "status": "SUCCEEDED", + "metrics": { + "latency": 0.1, + "cost": 0.2 + } + }, + "teardown": { + "sleep": 0.1 + } + }, + "2": { + "setup": { + "exception": "Some exception" + }, + "teardown": { + "exception": "Some other exception" + } + } + } } } From b4e5640725480712b38a44af9221f0356eeece1b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:12:59 -0500 Subject: [PATCH 082/109] remove mock scheduler --- .../schedulers/mock-scheduler-subschema.json | 83 ------------------- .../schemas/schedulers/scheduler-schema.json | 3 - .../tests/schedulers/mock_scheduler.py | 66 --------------- 3 files changed, 152 deletions(-) delete mode 100644 mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json delete mode 100644 mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json deleted file mode 100644 index 56d7590094..0000000000 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/mock-scheduler-subschema.json", - "title": "mlos_bench Mock Scheduler config", - "description": "config for an mlos_bench Mock Scheduler", - "type": "object", - "properties": { - "class": { - "enum": [ - "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler" - ] - }, - "config": { - "type": "object", - "allOf": [ - { - "$ref": "base-scheduler-subschema.json#/$defs/base_scheduler_config" - }, - { - "type": "object", - "properties": { - "mock_trial_data": { - "description": "A list of trial data to use for testing.", - "type": "array", - "items": { - "type": "object", - "properties": { - "comments": { - "type": "string", - "description": "Optional comments about the trial status being reported." - }, - "trial_id": { - "type": "integer", - "description": "Unique identifier for the trial.", - "examples": [1, 2, 3], - "minimum": 1 - }, - "status": { - "enum": [ - null, - "UNKNOWN", - "PENDING", - "READY", - "RUNNING", - "SUCCEEDED", - "CANCELED", - "FAILED", - "TIMED_OUT" - ] - }, - "metrics": { - "type": "object", - "description": "A dictionary of metrics for the trial.", - "additionalProperties": { - "type": ["number", "string", "boolean"], - "description": "The value of the metric." - }, - "examples": [ - { - "score": 0.95, - "color": "green" - }, - { - "accuracy": 0.85, - "loss": 0.15 - } - ] - } - }, - "required": ["trial_id", "status"], - "additionalProperties": false - } - } - }, - "minProperties": 1 - } - ], - "minProperties": 1, - "unevaluatedProperties": false - } - }, - "required": ["class"] -} diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json index 6ee7ffb1f1..3086abacd7 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json @@ -14,9 +14,6 @@ { "$comment": "The set of known Scheduler subschemas. Add others as needed.", "oneOf": [ - { - "$ref": "./mock-scheduler-subschema.json" - }, { "$ref": "./sync-scheduler-subschema.json" } diff --git a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py deleted file mode 100644 index 43ffff0c34..0000000000 --- a/mlos_bench/mlos_bench/tests/schedulers/mock_scheduler.py +++ /dev/null @@ -1,66 +0,0 @@ -# -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# -"""A mock scheduler that returns predefined status and score for specific trial IDs.""" - -import logging -from collections.abc import Iterable -from datetime import datetime -from typing import Any - -from pytz import UTC - -from mlos_bench.optimizers.base_optimizer import Optimizer -from mlos_bench.schedulers.base_scheduler import Scheduler -from mlos_bench.schedulers.trial_runner import TrialRunner -from mlos_bench.storage.base_storage import Storage - -_LOG = logging.getLogger(__name__) - - -class MockScheduler(Scheduler): - """A mock scheduler that returns predefined status and score for specific trial - IDs. - """ - - def __init__( # pylint: disable=too-many-arguments - self, - *, - config: dict[str, Any], - global_config: dict[str, Any], - trial_runners: Iterable[TrialRunner], - optimizer: Optimizer, - storage: Storage, - root_env_config: str, - ) -> None: - super().__init__( - config=config, - global_config=global_config, - trial_runners=trial_runners, - optimizer=optimizer, - storage=storage, - root_env_config=root_env_config, - ) - self._mock_trial_data = config.get("mock_trial_data", []) - self._mock_trial_data = { - trial_info["trial_id"]: trial_info for trial_info in self._mock_trial_data - } - - def run_trial(self, trial: Storage.Trial) -> None: - """ - Mock the execution of a trial. - - Parameters - ---------- - trial : Storage.Trial - The trial to be executed. - """ - trial_id = trial.trial_id - if trial_id not in self._mock_trial_data: - raise ValueError(f"Trial ID {trial_id} not found in mock trial data.") - - trial_info = self._mock_trial_data[trial_id] - _LOG.info("Running trial %d: %s", trial_id, trial_info) - # Don't run it - just update the status and optionally score. - trial.update(trial_info["status"], datetime.now(UTC), trial_info.get("score")) From 80cf2fa9776d9b40ce3a4951b1bdb1f45365548a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:21:02 -0500 Subject: [PATCH 083/109] allow mock trial data to be in the global config as well --- .../config/schemas/cli/globals-schema.json | 3 +++ .../environments/mock-env-subschema.json | 24 +++++++++++-------- .../good/full/globals-with-schema.jsonc | 17 ++++++++++++- .../tests/schedulers/test_scheduler.py | 23 ++++++++++++++++++ 4 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py diff --git a/mlos_bench/mlos_bench/config/schemas/cli/globals-schema.json b/mlos_bench/mlos_bench/config/schemas/cli/globals-schema.json index 015b4a6e62..39e60e3249 100644 --- a/mlos_bench/mlos_bench/config/schemas/cli/globals-schema.json +++ b/mlos_bench/mlos_bench/config/schemas/cli/globals-schema.json @@ -24,6 +24,9 @@ }, "optimization_targets": { "$ref": "./common-defs-subschemas.json#/$defs/optimization_targets" + }, + "mock_trial_data": { + "$ref": "../environments/mock-env-subschema.json#/$defs/mock_trial_data" } }, "additionalProperties": { diff --git a/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json b/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json index fd2c84876d..0ef370705d 100644 --- a/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json @@ -62,7 +62,8 @@ } } }, - "mock_trial_data": { + "mock_trial_data_item": { + "description": "Mock data for a single trial, split by phase", "type": "object", "properties": { "run": { @@ -118,6 +119,17 @@ }, "unevaluatedProperties": false, "minProperties": 1 + }, + "mock_trial_data": { + "description": "A set of mock trial data to use for testing, keyed by trial id. Used by MockEnv.", + "type": "object", + "patternProperties": { + "^[0-9]+$": { + "$ref": "#/$defs/mock_trial_data_item" + } + }, + "unevaluatedProperties": false, + "minProperties": 1 } }, @@ -162,15 +174,7 @@ "uniqueItems": true }, "mock_trial_data": { - "description": "A list of trial data to use for testing.", - "type": "object", - "patternProperties": { - "^[0-9]+$": { - "$ref": "#/$defs/mock_trial_data" - } - }, - "unevaluatedProperties": false, - "minProperties": 1 + "$ref": "#/$defs/mock_trial_data" } } } diff --git a/mlos_bench/mlos_bench/tests/config/schemas/globals/test-cases/good/full/globals-with-schema.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/globals/test-cases/good/full/globals-with-schema.jsonc index 58a0a31bb3..4ed580e09a 100644 --- a/mlos_bench/mlos_bench/tests/config/schemas/globals/test-cases/good/full/globals-with-schema.jsonc +++ b/mlos_bench/mlos_bench/tests/config/schemas/globals/test-cases/good/full/globals-with-schema.jsonc @@ -10,5 +10,20 @@ "mysql": ["mysql-innodb", "mysql-myisam", "mysql-binlog", "mysql-hugepages"] }, "experiment_id": "ExperimentName", - "trial_id": 1 + "trial_id": 1, + + "mock_trial_data": { + "1": { + "setup": { + "sleep": 1 + }, + "run": { + "status": "SUCCEEDED", + "metrics": { + "score": 0.9, + "color": "green" + } + } + } + } } diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py new file mode 100644 index 0000000000..70322519d3 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -0,0 +1,23 @@ +""" +Unit tests for base scheduler internals. +""" + +import pytest + +from mlos_bench.schedulers import Scheduler, SyncScheduler +from mlos_bench.tests.schedulers import MockScheduler + + +# TODO: +# Develop unit tests for schedulers. +# e.g., using MockScheduler it should validate that +# - the base scheduler can be used to run a trial +# - the base scheduler registers the values it receives from the mock_trial_data correctly +# - the base scheduler can be used to run multiple trials +# - the base scheduler does book keeping correctly + +# Actually, maybe what I really want is a MockTrialRunner that can be used to +# return dummy trial results after some predictable period for use in both +# parallel and synchronous schedulers. + +# No, in fact we can do that all with MockEnv and a small extension there. From 84266210d328a0771831e066076e487e697906df Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:34:23 -0500 Subject: [PATCH 084/109] comments as prompts in preparation to run a trial --- .../tests/schedulers/test_scheduler.py | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 70322519d3..01c0ede08a 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -1,23 +1,27 @@ """ Unit tests for base scheduler internals. -""" - -import pytest -from mlos_bench.schedulers import Scheduler, SyncScheduler -from mlos_bench.tests.schedulers import MockScheduler +Notes +----- +Some prompts to help write the tests: +Develop unit tests for `Scheduler` class. +- use `@pytest.mark.parametrize` to run the same test with different Scheduler classes (e.g. `SyncScheduler`, `ParallelScheduler`, etc.) +- use `MockEnv` with `mock_trial_data` as a `pytest.fixture` to run the tests + - needs a jsonc file or string that the `TrialRunner.create_from_json` method can use to create the Env multiple times -# TODO: -# Develop unit tests for schedulers. -# e.g., using MockScheduler it should validate that -# - the base scheduler can be used to run a trial -# - the base scheduler registers the values it receives from the mock_trial_data correctly -# - the base scheduler can be used to run multiple trials -# - the base scheduler does book keeping correctly +Check that: +1. the targeted scheduler can be used to run a trial + - check that results are stored in the storage backend correctly + - use the `sqlite_storage` fixture from `mlos_bench.tests.storage.sql.fixtures` for that + - check that the `_ran_trials` attribute is updated correctly after a run_scheduler call +2. the base scheduler `bulk_registers` the values it receives from the mock_trial_data correctly + - use `mock` to patch the `bulk_register` method in the `Scheduler` class's `optimizer` attribute and check the call arguments +3. the base scheduler does book keeping correctly + - use `mock` to patch the `add_new_optimizer_suggestions` method in the `Scheduler` class and check the `_last_trial_id` +""" -# Actually, maybe what I really want is a MockTrialRunner that can be used to -# return dummy trial results after some predictable period for use in both -# parallel and synchronous schedulers. +import pytest +import unittest.mock -# No, in fact we can do that all with MockEnv and a small extension there. +from mlos_bench.schedulers import Scheduler, SyncScheduler From cdc614fc7a222a5592eefbccac301cc930d52a3e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 04:34:53 +0000 Subject: [PATCH 085/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/environments/base_environment.py | 5 ++--- mlos_bench/mlos_bench/environments/mock_env.py | 5 +++-- mlos_bench/mlos_bench/environments/status.py | 3 ++- mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py | 7 ++++++- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index 98db5cfe14..335ff453b9 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -394,9 +394,8 @@ def current_trial_id(self) -> int: @property def trial_runner_id(self) -> int: """ - Get the ID of the - :py:class:`~.mlos_bench.schedulers.trial_runner.TrialRunner` for this - Environment. + Get the ID of the :py:class:`~.mlos_bench.schedulers.trial_runner.TrialRunner` + for this Environment. This value can be used in scripts or environment variables to help identify the TrialRunner for this Environment. diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index e15dcffed4..88199e48cb 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -6,10 +6,10 @@ import logging import random +import time from dataclasses import dataclass from datetime import datetime from typing import Any -import time import numpy @@ -227,7 +227,8 @@ def _produce_metrics(self, rand: random.Random | None) -> dict[str, TunableValue return {metric: float(score) for metric in self._metrics or []} def get_current_mock_trial_data(self) -> MockTrialData: - """Gets mock trial data for the current trial ID. + """ + Gets mock trial data for the current trial ID. If no (or missing) mock trial data is found, a new instance of MockTrialData is created from random data. diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index d49c4a9e0f..8bec0a22c4 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -25,7 +25,8 @@ class Status(enum.Enum): @staticmethod def parse(status: Any) -> "Status": - """Convert the input to a Status enum. + """ + Convert the input to a Status enum. Parameters ---------- diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 01c0ede08a..a13f24a7ba 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -1,3 +1,7 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# """ Unit tests for base scheduler internals. @@ -21,7 +25,8 @@ - use `mock` to patch the `add_new_optimizer_suggestions` method in the `Scheduler` class and check the `_last_trial_id` """ -import pytest import unittest.mock +import pytest + from mlos_bench.schedulers import Scheduler, SyncScheduler From b3f49e9804d6f13182cbab83737d050d4eed7d3a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:43:45 -0500 Subject: [PATCH 086/109] fixups --- .../mlos_bench/environments/mock_env.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index 88199e48cb..9d3b16cb0d 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -231,7 +231,7 @@ def get_current_mock_trial_data(self) -> MockTrialData: Gets mock trial data for the current trial ID. If no (or missing) mock trial data is found, a new instance of - MockTrialData is created from random data. + MockTrialData is created and later filled with random data. Note ---- @@ -262,7 +262,18 @@ def setup(self, tunables: TunableGroups, global_config: dict | None = None) -> b raise RuntimeError( f"Mock trial data setup exception: {mock_trial_data.setup.exception}" ) - return is_success and mock_trial_data.setup.status.is_ready() + return is_success + + def teardown(self) -> None: + mock_trial_data = self.get_current_mock_trial_data() + if mock_trial_data.teardown.sleep: + _LOG.debug("Sleeping for %s seconds", mock_trial_data.teardown.sleep) + time.sleep(mock_trial_data.teardown.sleep) + if mock_trial_data.teardown.exception: + raise RuntimeError( + f"Mock trial data teardown exception: {mock_trial_data.teardown.exception}" + ) + super().teardown() def run(self) -> tuple[Status, datetime, dict[str, TunableValue] | None]: """ @@ -286,7 +297,6 @@ def run(self) -> tuple[Status, datetime, dict[str, TunableValue] | None]: time.sleep(mock_trial_data.run.sleep) if mock_trial_data.run.exception: raise RuntimeError(f"Mock trial data run exception: {mock_trial_data.run.exception}") - if mock_trial_data.run.metrics is None: # If no metrics are provided, generate them. mock_trial_data.run.metrics = self._produce_metrics(self._run_random) @@ -316,9 +326,13 @@ def status(self) -> tuple[Status, datetime, list[tuple[datetime, str, Any]]]: ) if mock_trial_data.status.metrics is None: # If no metrics are provided, generate them. + # Note: we don't save these in the mock trial data as they may need + # to change to preserve backwards compatibility with previous tests. metrics = self._produce_metrics(self._status_random) else: # If metrics are provided, use them. + # Note: current implementation uses the same metrics for all status + # calls of this trial. metrics = mock_trial_data.status.metrics return ( mock_trial_data.status.status, From a41e85f479436cd1e7f12a70c634c651a1c92358 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:45:09 -0500 Subject: [PATCH 087/109] revert --- .../environments/test-cases/good/full/mock_env-full.jsonc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc index b618627ecf..a23971f036 100644 --- a/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc +++ b/mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/good/full/mock_env-full.jsonc @@ -1,5 +1,5 @@ { - //"$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/environments/environment-schema.json", + "$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/environments/environment-schema.json", "name": "mock_env-full", "description": "More descriptive text.", From 63e0f88e7c9378df80ee36a351799bf086095000 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:45:56 -0500 Subject: [PATCH 088/109] remove --- mlos_bench/mlos_bench/tests/schedulers/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/__init__.py b/mlos_bench/mlos_bench/tests/schedulers/__init__.py index b166858875..4bc0076079 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/__init__.py +++ b/mlos_bench/mlos_bench/tests/schedulers/__init__.py @@ -3,8 +3,3 @@ # Licensed under the MIT License. # """mlos_bench.tests.schedulers.""" -from mlos_bench.tests.schedulers.mock_scheduler import MockScheduler - -__all__ = [ - "MockScheduler", -] From 44bdde2916535208150df25d6fc2092294bcef0e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:34:47 -0500 Subject: [PATCH 089/109] refactor status parsing a little bit again --- mlos_bench/mlos_bench/environments/status.py | 37 +++++++++++++------ mlos_bench/mlos_bench/storage/sql/common.py | 2 +- .../mlos_bench/storage/sql/experiment.py | 6 +-- .../tests/environments/test_status.py | 8 ++-- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 6d76d7206c..d49c4a9e0f 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -24,21 +24,36 @@ class Status(enum.Enum): TIMED_OUT = 7 @staticmethod - def from_str(status_str: Any) -> "Status": - """Convert a string to a Status enum.""" - if not isinstance(status_str, str): - _LOG.warning("Expected type %s for status: %s", type(status_str), status_str) - status_str = str(status_str) - if status_str.isdigit(): + def parse(status: Any) -> "Status": + """Convert the input to a Status enum. + + Parameters + ---------- + status : Any + The status to parse. This can be a string (or string convertible), + int, or Status enum. + + Returns + ------- + Status + The corresponding Status enum value or else UNKNOWN if the input is not + recognized. + """ + if isinstance(status, Status): + return status + if not isinstance(status, str): + _LOG.warning("Expected type %s for status: %s", type(status), status) + status = str(status) + if status.isdigit(): try: - return Status(int(status_str)) + return Status(int(status)) except ValueError: - _LOG.warning("Unknown status: %d", int(status_str)) + _LOG.warning("Unknown status: %d", int(status)) try: - status_str = status_str.upper().strip() - return Status[status_str] + status = status.upper().strip() + return Status[status] except KeyError: - _LOG.warning("Unknown status: %s", status_str) + _LOG.warning("Unknown status: %s", status) return Status.UNKNOWN def is_good(self) -> bool: diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index 97eb270c9d..032cf9259d 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -95,7 +95,7 @@ def get_trials( config_id=trial.config_id, ts_start=utcify_timestamp(trial.ts_start, origin="utc"), ts_end=utcify_nullable_timestamp(trial.ts_end, origin="utc"), - status=Status.from_str(trial.status), + status=Status.parse(trial.status), trial_runner_id=trial.trial_runner_id, ) for trial in trials.fetchall() diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index acc2a497b4..0e380e3e13 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -188,7 +188,7 @@ def load( status: list[Status] = [] for trial in cur_trials.fetchall(): - stat = Status.from_str(trial.status) + stat = Status.parse(trial.status) status.append(stat) trial_ids.append(trial.trial_id) configs.append( @@ -272,7 +272,7 @@ def get_trial_by_id( config_id=trial.config_id, trial_runner_id=trial.trial_runner_id, opt_targets=self._opt_targets, - status=Status.from_str(trial.status), + status=Status.parse(trial.status), restoring=True, config=config, ) @@ -330,7 +330,7 @@ def pending_trials( config_id=trial.config_id, trial_runner_id=trial.trial_runner_id, opt_targets=self._opt_targets, - status=Status.from_str(trial.status), + status=Status.parse(trial.status), restoring=True, config=config, ) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 3c0a9bccf3..8123f2b852 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -51,16 +51,16 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: Expected Status enum value. """ assert ( - Status.from_str(input_str) == expected_status + Status.parse(input_str) == expected_status ), f"Expected {expected_status} for input: {input_str}" # Check lowercase representation assert ( - Status.from_str(input_str.lower()) == expected_status + Status.parse(input_str.lower()) == expected_status ), f"Expected {expected_status} for input: {input_str.lower()}" if input_str.isdigit(): # Also test the numeric representation assert ( - Status.from_str(int(input_str)) == expected_status + Status.parse(int(input_str)) == expected_status ), f"Expected {expected_status} for input: {int(input_str)}" @@ -83,7 +83,7 @@ def test_status_from_str_invalid(invalid_input: Any) -> None: input. """ assert ( - Status.from_str(invalid_input) == Status.UNKNOWN + Status.parse(invalid_input) == Status.UNKNOWN ), f"Expected Status.UNKNOWN for invalid input: {invalid_input}" From 607fffdc53ef9e930b6a50814645ea79b508d250 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 22:36:27 -0500 Subject: [PATCH 090/109] extra test too --- mlos_bench/mlos_bench/tests/environments/test_status.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/environments/test_status.py b/mlos_bench/mlos_bench/tests/environments/test_status.py index 8123f2b852..785275825c 100644 --- a/mlos_bench/mlos_bench/tests/environments/test_status.py +++ b/mlos_bench/mlos_bench/tests/environments/test_status.py @@ -57,6 +57,9 @@ def test_status_from_str_valid(input_str: str, expected_status: Status) -> None: assert ( Status.parse(input_str.lower()) == expected_status ), f"Expected {expected_status} for input: {input_str.lower()}" + assert ( + Status.parse(expected_status) == expected_status + ), f"Expected {expected_status} for input: {expected_status}" if input_str.isdigit(): # Also test the numeric representation assert ( From aaf0842e6f5cc9b3edbce958145becbb82196749 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 04:50:54 +0000 Subject: [PATCH 091/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/environments/status.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index d49c4a9e0f..8bec0a22c4 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -25,7 +25,8 @@ class Status(enum.Enum): @staticmethod def parse(status: Any) -> "Status": - """Convert the input to a Status enum. + """ + Convert the input to a Status enum. Parameters ---------- From 8f472bbf9b31cbf3adb87c3702c9c34ac08ab127 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 21 May 2025 23:52:58 -0500 Subject: [PATCH 092/109] revert --- .../config/schemas/schedulers/base-scheduler-subschema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json index 9417b7d00a..702da1eec3 100644 --- a/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/schedulers/base-scheduler-subschema.json @@ -58,7 +58,7 @@ "description": "The name of the scheduler class to use.", "type": "string", "$comment": "Exact matches are handled elsewhere.", - "pattern": "^mlos_bench([.]tests)?[.]schedulers[.]" + "pattern": "^mlos_bench[.]schedulers[.]" }, "config": { From 8b3d839985f7e3e57e5801f37e1c5e502b88b1e0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 13:13:39 -0500 Subject: [PATCH 093/109] remove mock scheduler configs --- ...-scheduler-invalid-mock-trial-data-2.jsonc | 11 ------- ...ck-scheduler-invalid-mock-trial-data.jsonc | 14 --------- .../bad/unhandled/mock_sched-extra.jsonc | 7 ----- .../good/full/mock_sched-full.jsonc | 31 ------------------- .../good/partial/mock_sched-partial.jsonc | 20 ------------ 5 files changed, 83 deletions(-) delete mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc delete mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc delete mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc delete mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc delete mode 100644 mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc deleted file mode 100644 index 09b74f4377..0000000000 --- a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data-2.jsonc +++ /dev/null @@ -1,11 +0,0 @@ -{ - "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", - "config": { - "mock_trial_data": [ - { - // MISSING: "trial_id": 1, - "status": "SUCCEEDED" - } - ] - } -} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc deleted file mode 100644 index 144e7f21e6..0000000000 --- a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/invalid/mock-scheduler-invalid-mock-trial-data.jsonc +++ /dev/null @@ -1,14 +0,0 @@ -{ - "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", - "config": { - "mock_trial_data": [ - { - "trial_id": 1, - "status": "INVALID" - }, - { - "status": "SUCCEEDED" - } - ] - } -} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc deleted file mode 100644 index c7bf9a46f7..0000000000 --- a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/bad/unhandled/mock_sched-extra.jsonc +++ /dev/null @@ -1,7 +0,0 @@ -{ - "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", - "config": { - "extra": "unsupported" - }, - "extra": "unsupported" -} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc deleted file mode 100644 index ca6ad2f905..0000000000 --- a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/mock_sched-full.jsonc +++ /dev/null @@ -1,31 +0,0 @@ -{ - "$schema": "https://raw.githubusercontent.com/microsoft/MLOS/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json", - "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", - "config": { - "trial_config_repeat_count": 3, - "teardown": false, - "experiment_id": "MyExperimentName", - "config_id": 1, - "trial_id": 1, - "max_trials": 2, - - "mock_trial_data": [ - { - "status": "SUCCEEDED", - "trial_id": 1, - "metrics": { - "score": 0.9, - "color": "green" - } - }, - { - "status": "FAILED", - "trial_id": 2, - "metrics": { - "score": 0.1, - "color": "red" - } - } - ] - } -} diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc deleted file mode 100644 index 2557c25cb4..0000000000 --- a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/partial/mock_sched-partial.jsonc +++ /dev/null @@ -1,20 +0,0 @@ -{ - "class": "mlos_bench.tests.schedulers.mock_scheduler.MockScheduler", - "config": { - "mock_trial_data": [ - { - "status": "SUCCEEDED", - "trial_id": 1, - "metrics": { - "score": 0.9, - "color": "green" - } - }, - { - "status": "FAILED", - "trial_id": 2 - // missing metrics - OK - } - ] - } -} From 8a4aac29423e7996771ddb926fe47f14fbbaa89f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 13:19:45 -0500 Subject: [PATCH 094/109] comments --- mlos_bench/mlos_bench/environments/status.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 8bec0a22c4..6343d3e854 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -129,4 +129,15 @@ def is_timed_out(self) -> bool: Status.TIMED_OUT, } ) -"""The set of completed statuses.""" +""" +The set of completed statuses. + +Includes all statuses that indicate the trial or experiment has finished, either +successfully or not. +This set is used to determine if a trial or experiment has reached a final state. +This includes: +- :py:data:`.Status.SUCCEEDED`: The trial or experiment completed successfully. +- :py:data:`.Status.CANCELED`: The trial or experiment was canceled. +- :py:data:`.Status.FAILED`: The trial or experiment failed. +- :py:data:`.Status.TIMED_OUT`: The trial or experiment timed out. +""" From 50ffde4bbcd3387b988168fb3bbf87799c98ad12 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 14:07:17 -0500 Subject: [PATCH 095/109] add accessor for the mock_trial_data property --- mlos_bench/mlos_bench/environments/mock_env.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index 9d3b16cb0d..6af016b7e8 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -7,6 +7,7 @@ import logging import random import time +from copy import deepcopy from dataclasses import dataclass from datetime import datetime from typing import Any @@ -226,6 +227,17 @@ def _produce_metrics(self, rand: random.Random | None) -> dict[str, TunableValue return {metric: float(score) for metric in self._metrics or []} + def mock_trial_data(self) -> dict[int, MockTrialData]: + """ + Get the mock trial data for all trials. + + Returns + ------- + dict[int, MockTrialData] + Dictionary of mock trial data keyed by trial ID. + """ + return deepcopy(self._mock_trial_data) + def get_current_mock_trial_data(self) -> MockTrialData: """ Gets mock trial data for the current trial ID. From b1e2a52f19dd14e49d2aa88b1fbfa7289f5d496b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 14:07:43 -0500 Subject: [PATCH 096/109] make it a property --- mlos_bench/mlos_bench/environments/mock_env.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index 6af016b7e8..c565b1adc6 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -227,6 +227,7 @@ def _produce_metrics(self, rand: random.Random | None) -> dict[str, TunableValue return {metric: float(score) for metric in self._metrics or []} + @property def mock_trial_data(self) -> dict[int, MockTrialData]: """ Get the mock trial data for all trials. From f0f7c4cf6bdf12e484e9b2e710b1d2ecbb568e85 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 14:16:29 -0500 Subject: [PATCH 097/109] add some basic fixtures to get started with --- .../mlos_bench/tests/schedulers/conftest.py | 95 ++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index 83a18783cc..75b42fa9a1 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -2,4 +2,97 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Pytest fixtures for mlos_bench.schedulers tests.""" +""" +Pytest fixtures for mlos_bench.schedulers tests. +""" +# pylint: disable=redefined-outer-name + +import json + +import pytest + +from mlos_bench.environments.mock_env import MockEnv +from mlos_bench.services.config_persistence import ConfigPersistenceService +from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.tunables.tunable_groups import TunableGroups +import mlos_bench.tests.optimizers.fixtures as optimizers_fixtures + + +NUM_TRIAL_RUNNERS = 4 + + +@pytest.fixture +def mock_env_config() -> dict: + """A config for a MockEnv with mock_trial_data.""" + return { + "name": "Test MockEnv With Explicit Mock Trial Data", + "class": "mlos_bench.environments.mock_env.MockEnv", + "config": { + "mock_env_seed": -1, + "mock_env_range": [0, 10], + "mock_env_metrics": ["score"], + # TODO: Add more mock trial data here: + "mock_trial_data": { + "0": { + "setup": { + "status": "SUCCEEDED", + }, + "run": { + "status": "SUCCEEDED", + "metrics": { + "score": 1.0, + }, + }, + }, + "1": { + "setup": { + "status": "SUCCEEDED", + }, + "run": { + "status": "SUCCEEDED", + "metrics": { + "score": 2.0, + }, + }, + }, + }, + }, + } + + +@pytest.fixture +def mock_env_json_config(mock_env_config: dict) -> str: + """A JSON string of the mock_env_config.""" + return json.dumps(mock_env_config) + + +@pytest.fixture +def mock_env( + mock_env_json_config: str, + tunable_groups: TunableGroups, +) -> MockEnv: + """A fixture to create a MockEnv instance using the mock_env_json_config.""" + config_loader_service = ConfigPersistenceService() + mock_env = config_loader_service.load_environment( + mock_env_json_config, + tunable_groups, + service=config_loader_service, + ) + assert isinstance(mock_env, MockEnv) + return mock_env + + +@pytest.fixture +def trial_runners( + mock_env_json_config: str, + tunable_groups: TunableGroups, +) -> list[TrialRunner]: + """A fixture to create a list of TrialRunner instances using the + mock_env_json_config.""" + config_loader_service = ConfigPersistenceService() + return TrialRunner.create_from_json( + config_loader=config_loader_service, + env_json=mock_env_json_config, + tunable_groups=tunable_groups, + num_trial_runners=NUM_TRIAL_RUNNERS, + ) From c27b3c269dd800cb268aa2024ac243bb7c4c612c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 14:16:50 -0500 Subject: [PATCH 098/109] add a method for creating schedulers, intended to be used with pytest parameterize --- .../tests/schedulers/test_scheduler.py | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index a13f24a7ba..40c99d019c 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -3,30 +3,36 @@ # Licensed under the MIT License. # """ -Unit tests for base scheduler internals. +Unit tests for :py:class:`mlos_bench.schedulers` and their internals. +""" -Notes ------ -Some prompts to help write the tests: +import pytest +import unittest.mock -Develop unit tests for `Scheduler` class. -- use `@pytest.mark.parametrize` to run the same test with different Scheduler classes (e.g. `SyncScheduler`, `ParallelScheduler`, etc.) -- use `MockEnv` with `mock_trial_data` as a `pytest.fixture` to run the tests - - needs a jsonc file or string that the `TrialRunner.create_from_json` method can use to create the Env multiple times +from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.schedulers.base_scheduler import Scheduler +from mlos_bench.schedulers.sync_scheduler import SyncScheduler +from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.schedulers.trial_runner import TrialRunner +import mlos_bench.tests.optimizers.fixtures as optimizers_fixtures -Check that: -1. the targeted scheduler can be used to run a trial - - check that results are stored in the storage backend correctly - - use the `sqlite_storage` fixture from `mlos_bench.tests.storage.sql.fixtures` for that - - check that the `_ran_trials` attribute is updated correctly after a run_scheduler call -2. the base scheduler `bulk_registers` the values it receives from the mock_trial_data correctly - - use `mock` to patch the `bulk_register` method in the `Scheduler` class's `optimizer` attribute and check the call arguments -3. the base scheduler does book keeping correctly - - use `mock` to patch the `add_new_optimizer_suggestions` method in the `Scheduler` class and check the `_last_trial_id` -""" +mock_opt = optimizers_fixtures.mock_opt -import unittest.mock +# pylint: disable=redefined-outer-name -import pytest -from mlos_bench.schedulers import Scheduler, SyncScheduler +def create_scheduler( + scheduler_type: type[Scheduler], + trial_runners: list[TrialRunner], + mock_opt: MockOptimizer, + sqlite_storage: SqlStorage, +) -> Scheduler: + """Create a Scheduler instance using trial_runners, mock_opt, and sqlite_storage.""" + return scheduler_type( + config={}, + global_config={}, + trial_runners=trial_runners, + optimizer=mock_opt, + storage=sqlite_storage, + root_env_config="", + ) From 981326890418e4ba170fdf647e7b5c69decd91af Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 14:33:48 -0500 Subject: [PATCH 099/109] stubbing out a very basic test to get started --- .../tests/schedulers/test_scheduler.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 40c99d019c..559ab9fddb 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -8,7 +8,9 @@ import pytest import unittest.mock +import sys +from mlos_bench.environments.mock_env import MockEnv from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.sync_scheduler import SyncScheduler @@ -36,3 +38,37 @@ def create_scheduler( storage=sqlite_storage, root_env_config="", ) + + +@pytest.mark.parametrize( + "scheduler_class", + [ + SyncScheduler, + ], +) +@pytest.mark.skipif( + sys.platform == "win32", + reason="Skipping test on Windows - SQLite storage is not accessible in parallel tests there.", +) +def test_scheduler( + scheduler_class: type[Scheduler], + # fixtures: + trial_runners: list[TrialRunner], + mock_opt: MockOptimizer, + sqlite_storage: SqlStorage, +) -> None: + """ + Test the creation of a SyncScheduler instance. + """ + scheduler = create_scheduler( + scheduler_class, + trial_runners, + mock_opt, + sqlite_storage, + ) + assert isinstance(scheduler, scheduler_class) + assert isinstance(scheduler.trial_runners, list) + assert len(scheduler.trial_runners) == len(trial_runners) + assert isinstance(scheduler.optimizer, MockOptimizer) + assert isinstance(scheduler.storage, SqlStorage) + assert isinstance(scheduler.root_environment, MockEnv) From 3b9201767adfd8d6eb8dec0218a09fc3d887b066 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 14:55:16 -0500 Subject: [PATCH 100/109] wip: testing --- .../mlos_bench/schedulers/base_scheduler.py | 11 +- .../tests/schedulers/test_scheduler.py | 114 +++++++++++++++--- 2 files changed, 106 insertions(+), 19 deletions(-) diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 3e47841591..3c22d427c7 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -243,7 +243,14 @@ def __exit__( return False # Do not suppress exceptions def _prepare_start(self) -> bool: - """Prepare the scheduler for starting.""" + """ + Prepare the scheduler for starting. + + Notes + ----- + This method is called by the :py:meth:`Scheduler.start` method. + It is split out mostly to allow for easier unit testing/mocking. + """ assert self.experiment is not None _LOG.info( "START: Experiment: %s Env: %s Optimizer: %s", @@ -280,7 +287,7 @@ def _execute_scheduling_step(self, is_warm_up: bool) -> bool: Notes ----- This method is called by the :py:meth:`Scheduler.start` method. - It is split out mostly to allow for easier testing with MockSchedulers. + It is split out mostly to allow for easier unit testing/mocking. """ assert self.experiment is not None _LOG.info("Optimization loop: Last trial ID: %d", self._last_trial_id) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 559ab9fddb..e7d07c1292 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -6,14 +6,15 @@ Unit tests for :py:class:`mlos_bench.schedulers` and their internals. """ -import pytest -import unittest.mock +from unittest.mock import patch import sys +import pytest + +from mlos_core.tests import get_all_concrete_subclasses from mlos_bench.environments.mock_env import MockEnv from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.schedulers.base_scheduler import Scheduler -from mlos_bench.schedulers.sync_scheduler import SyncScheduler from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.trial_runner import TrialRunner import mlos_bench.tests.optimizers.fixtures as optimizers_fixtures @@ -29,10 +30,24 @@ def create_scheduler( mock_opt: MockOptimizer, sqlite_storage: SqlStorage, ) -> Scheduler: - """Create a Scheduler instance using trial_runners, mock_opt, and sqlite_storage.""" + """Create a Scheduler instance using trial_runners, mock_opt, and + sqlite_storage fixtures.""" + + env = trial_runners[0].environment + assert isinstance(env, MockEnv), "Environment is not a MockEnv instance." + max_trials = max(trial_id for trial_id in env.mock_trial_data.keys()) + max_trials = min(max_trials, mock_opt.max_suggestions) + return scheduler_type( - config={}, - global_config={}, + config={ + "max_trials": max_trials, + }, + global_config={ + "experiment_id": "Test{scheduler_type.__name__}Experiment", + "trial_id": 1, + # TODO: Adjust this in the future? + "trial_repeat_count": 1, + }, trial_runners=trial_runners, optimizer=mock_opt, storage=sqlite_storage, @@ -40,11 +55,16 @@ def create_scheduler( ) +scheduler_classes = get_all_concrete_subclasses( + Scheduler, # type: ignore[type-abstract] + pkg_name="mlos_bench", +) +assert scheduler_classes, "No Scheduler classes found in mlos_bench." + + @pytest.mark.parametrize( "scheduler_class", - [ - SyncScheduler, - ], + scheduler_classes, ) @pytest.mark.skipif( sys.platform == "win32", @@ -52,23 +72,83 @@ def create_scheduler( ) def test_scheduler( scheduler_class: type[Scheduler], - # fixtures: trial_runners: list[TrialRunner], mock_opt: MockOptimizer, sqlite_storage: SqlStorage, ) -> None: """ - Test the creation of a SyncScheduler instance. + Full integration test for Scheduler: runs trials, checks storage, optimizer + registration, and internal bookkeeping. """ + # pylint: disable=too-many-locals + + # Create the scheduler. scheduler = create_scheduler( scheduler_class, trial_runners, mock_opt, sqlite_storage, ) - assert isinstance(scheduler, scheduler_class) - assert isinstance(scheduler.trial_runners, list) - assert len(scheduler.trial_runners) == len(trial_runners) - assert isinstance(scheduler.optimizer, MockOptimizer) - assert isinstance(scheduler.storage, SqlStorage) - assert isinstance(scheduler.root_environment, MockEnv) + + root_env = scheduler.root_environment + assert isinstance(root_env, MockEnv), "Root environment is not a MockEnv instance." + mock_trial_data = root_env.mock_trial_data + + # Patch bulk_register and add_new_optimizer_suggestions + with ( + patch.object( + scheduler.optimizer, + "bulk_register", + wraps=scheduler.optimizer.bulk_register, + ) as mock_bulk_register, + patch.object( + scheduler, + "add_new_optimizer_suggestions", + wraps=scheduler.add_new_optimizer_suggestions, + ) as mock_add_suggestions, + ): + # Run the scheduler + with scheduler: + scheduler.start() + scheduler.teardown() + + # Now check the results. + # TODO + + # 1. Check results in storage + experiments = scheduler.storage.experiments + assert experiments, "No experiments found in storage." + # Get the first experiment + experiment = next(iter(experiments.values())) + trials = experiment.trials + # Compare with mock_trial_data from root_environment + for trial_id, trial_data in trials.items(): + # Check that the trial result matches the mock data + expected = mock_trial_data[trial_id].run.metrics + actual = trial_data.results_dict + assert actual == expected, f"Trial {trial_id} results {actual} != expected {expected}" + + # 1b. Check ran_trials bookkeeping + ran_trials = scheduler.ran_trials + assert len(ran_trials) == len(trials) + for trial in ran_trials: + assert ( + trial.status.is_ready() + ), f"Trial {trial.trial_id} did not complete successfully." + + # 2. Check optimizer registration + assert mock_bulk_register.called, "bulk_register was not called on optimizer." + # Check that the configs and scores match the mock_trial_data + for call in mock_bulk_register.call_args_list: + configs, scores, *_ = call.args + for i, config in enumerate(configs): + trial_id = i # assumes order matches + expected_score = mock_trial_data[trial_id].run.metrics + assert ( + scores[i] == expected_score + ), f"bulk_register score {scores[i]} != expected {expected_score} for trial {trial_id}" + + # 3. Check bookkeeping: add_new_optimizer_suggestions and _last_trial_id + assert mock_add_suggestions.called, "add_new_optimizer_suggestions was not called." + # _last_trial_id should be the last trial id + assert getattr(scheduler, "_last_trial_id", None) == max(trials.keys()) From 341564ebd82d896f1ffcb5a2c97ce4d5e21f0783 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 16:03:23 -0500 Subject: [PATCH 101/109] wip --- .../environments/base_environment.py | 3 +- .../mlos_bench/tests/schedulers/conftest.py | 36 ++++++-- .../tests/schedulers/test_scheduler.py | 86 +++++-------------- 3 files changed, 53 insertions(+), 72 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index 335ff453b9..e98d410f7a 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -385,9 +385,10 @@ def current_trial_id(self) -> int: """ val = self._params["trial_id"] assert isinstance(val, int), ( - "Expected trial_id to be an int, but got %s (type %s)", + "Expected trial_id to be an int, but got %s (type %s): %s", val, type(val), + self._params, ) return val diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index 75b42fa9a1..272edbc9ee 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -15,7 +15,6 @@ from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.tunables.tunable_groups import TunableGroups -import mlos_bench.tests.optimizers.fixtures as optimizers_fixtures NUM_TRIAL_RUNNERS = 4 @@ -28,16 +27,19 @@ def mock_env_config() -> dict: "name": "Test MockEnv With Explicit Mock Trial Data", "class": "mlos_bench.environments.mock_env.MockEnv", "config": { + # Reference the covariant groups from the `tunable_groups` fixture. + # See Also: + # - mlos_bench/tests/conftest.py + # - mlos_bench/tests/tunable_groups_fixtures.py + "tunable_params": ["provision", "boot", "kernel"], "mock_env_seed": -1, "mock_env_range": [0, 10], "mock_env_metrics": ["score"], # TODO: Add more mock trial data here: "mock_trial_data": { "0": { - "setup": { - "status": "SUCCEEDED", - }, "run": { + "sleep": 0.15, "status": "SUCCEEDED", "metrics": { "score": 1.0, @@ -45,13 +47,20 @@ def mock_env_config() -> dict: }, }, "1": { - "setup": { + "run": { + "sleep": 0.2, "status": "SUCCEEDED", + "metrics": { + "score": 2.0, + }, }, + }, + "2": { "run": { + "sleep": 0.1, "status": "SUCCEEDED", "metrics": { - "score": 2.0, + "score": 3.0, }, }, }, @@ -60,6 +69,15 @@ def mock_env_config() -> dict: } +@pytest.fixture +def global_config() -> dict: + """A global config for a MockEnv.""" + return { + "experiment_id": "TestExperiment", + "trial_id": 1, + } + + @pytest.fixture def mock_env_json_config(mock_env_config: dict) -> str: """A JSON string of the mock_env_config.""" @@ -70,6 +88,7 @@ def mock_env_json_config(mock_env_config: dict) -> str: def mock_env( mock_env_json_config: str, tunable_groups: TunableGroups, + global_config: dict, ) -> MockEnv: """A fixture to create a MockEnv instance using the mock_env_json_config.""" config_loader_service = ConfigPersistenceService() @@ -77,6 +96,7 @@ def mock_env( mock_env_json_config, tunable_groups, service=config_loader_service, + global_config=global_config, ) assert isinstance(mock_env, MockEnv) return mock_env @@ -86,13 +106,15 @@ def mock_env( def trial_runners( mock_env_json_config: str, tunable_groups: TunableGroups, + global_config: dict, ) -> list[TrialRunner]: """A fixture to create a list of TrialRunner instances using the mock_env_json_config.""" - config_loader_service = ConfigPersistenceService() + config_loader_service = ConfigPersistenceService(global_config=global_config) return TrialRunner.create_from_json( config_loader=config_loader_service, env_json=mock_env_json_config, tunable_groups=tunable_groups, num_trial_runners=NUM_TRIAL_RUNNERS, + global_config=global_config, ) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index e7d07c1292..302447715a 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -18,8 +18,10 @@ from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.trial_runner import TrialRunner import mlos_bench.tests.optimizers.fixtures as optimizers_fixtures +import mlos_bench.tests.storage.sql.fixtures as sql_storage_fixtures mock_opt = optimizers_fixtures.mock_opt +sqlite_storage = sql_storage_fixtures.sqlite_storage # pylint: disable=redefined-outer-name @@ -29,6 +31,7 @@ def create_scheduler( trial_runners: list[TrialRunner], mock_opt: MockOptimizer, sqlite_storage: SqlStorage, + global_config: dict, ) -> Scheduler: """Create a Scheduler instance using trial_runners, mock_opt, and sqlite_storage fixtures.""" @@ -38,16 +41,13 @@ def create_scheduler( max_trials = max(trial_id for trial_id in env.mock_trial_data.keys()) max_trials = min(max_trials, mock_opt.max_suggestions) + global_config["experiment_id"] = f"Test{scheduler_type.__name__}Experiment" + return scheduler_type( config={ "max_trials": max_trials, }, - global_config={ - "experiment_id": "Test{scheduler_type.__name__}Experiment", - "trial_id": 1, - # TODO: Adjust this in the future? - "trial_repeat_count": 1, - }, + global_config=global_config, trial_runners=trial_runners, optimizer=mock_opt, storage=sqlite_storage, @@ -75,6 +75,7 @@ def test_scheduler( trial_runners: list[TrialRunner], mock_opt: MockOptimizer, sqlite_storage: SqlStorage, + global_config: dict, ) -> None: """ Full integration test for Scheduler: runs trials, checks storage, optimizer @@ -88,67 +89,24 @@ def test_scheduler( trial_runners, mock_opt, sqlite_storage, + global_config, ) root_env = scheduler.root_environment assert isinstance(root_env, MockEnv), "Root environment is not a MockEnv instance." mock_trial_data = root_env.mock_trial_data - # Patch bulk_register and add_new_optimizer_suggestions - with ( - patch.object( - scheduler.optimizer, - "bulk_register", - wraps=scheduler.optimizer.bulk_register, - ) as mock_bulk_register, - patch.object( - scheduler, - "add_new_optimizer_suggestions", - wraps=scheduler.add_new_optimizer_suggestions, - ) as mock_add_suggestions, - ): - # Run the scheduler - with scheduler: - scheduler.start() - scheduler.teardown() - - # Now check the results. - # TODO - - # 1. Check results in storage - experiments = scheduler.storage.experiments - assert experiments, "No experiments found in storage." - # Get the first experiment - experiment = next(iter(experiments.values())) - trials = experiment.trials - # Compare with mock_trial_data from root_environment - for trial_id, trial_data in trials.items(): - # Check that the trial result matches the mock data - expected = mock_trial_data[trial_id].run.metrics - actual = trial_data.results_dict - assert actual == expected, f"Trial {trial_id} results {actual} != expected {expected}" - - # 1b. Check ran_trials bookkeeping - ran_trials = scheduler.ran_trials - assert len(ran_trials) == len(trials) - for trial in ran_trials: - assert ( - trial.status.is_ready() - ), f"Trial {trial.trial_id} did not complete successfully." - - # 2. Check optimizer registration - assert mock_bulk_register.called, "bulk_register was not called on optimizer." - # Check that the configs and scores match the mock_trial_data - for call in mock_bulk_register.call_args_list: - configs, scores, *_ = call.args - for i, config in enumerate(configs): - trial_id = i # assumes order matches - expected_score = mock_trial_data[trial_id].run.metrics - assert ( - scores[i] == expected_score - ), f"bulk_register score {scores[i]} != expected {expected_score} for trial {trial_id}" - - # 3. Check bookkeeping: add_new_optimizer_suggestions and _last_trial_id - assert mock_add_suggestions.called, "add_new_optimizer_suggestions was not called." - # _last_trial_id should be the last trial id - assert getattr(scheduler, "_last_trial_id", None) == max(trials.keys()) + # Run the scheduler + with scheduler: + scheduler.start() + scheduler.teardown() + + # Now check the results. + # TODO: + # Check the overall results: + # 1. Check the results in storage. + # 2. Check the optimizer registration. + # 3. Check the bookkeeping for ran_trials. + # 4. Check the bookkeeping for add_new_optimizer_suggestions and _last_trial_id. + # This last part may require patching and intercepting during the start() + # loop to validate in-progress book keeping instead of just overall. From 5c068149510765555524c68f9f5f4fcb4b078dda Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 21:03:47 +0000 Subject: [PATCH 102/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../mlos_bench/tests/schedulers/conftest.py | 10 ++++----- .../tests/schedulers/test_scheduler.py | 21 +++++++++---------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index 272edbc9ee..dffd8e8bad 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -2,9 +2,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Pytest fixtures for mlos_bench.schedulers tests. -""" +"""Pytest fixtures for mlos_bench.schedulers tests.""" # pylint: disable=redefined-outer-name import json @@ -12,11 +10,10 @@ import pytest from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.services.config_persistence import ConfigPersistenceService from mlos_bench.tunables.tunable_groups import TunableGroups - NUM_TRIAL_RUNNERS = 4 @@ -109,7 +106,8 @@ def trial_runners( global_config: dict, ) -> list[TrialRunner]: """A fixture to create a list of TrialRunner instances using the - mock_env_json_config.""" + mock_env_json_config. + """ config_loader_service = ConfigPersistenceService(global_config=global_config) return TrialRunner.create_from_json( config_loader=config_loader_service, diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 302447715a..12828bb17d 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -2,23 +2,21 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -""" -Unit tests for :py:class:`mlos_bench.schedulers` and their internals. -""" +"""Unit tests for :py:class:`mlos_bench.schedulers` and their internals.""" -from unittest.mock import patch import sys +from unittest.mock import patch import pytest -from mlos_core.tests import get_all_concrete_subclasses +import mlos_bench.tests.optimizers.fixtures as optimizers_fixtures +import mlos_bench.tests.storage.sql.fixtures as sql_storage_fixtures from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.storage.sql.storage import SqlStorage -from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner -import mlos_bench.tests.optimizers.fixtures as optimizers_fixtures -import mlos_bench.tests.storage.sql.fixtures as sql_storage_fixtures +from mlos_bench.storage.sql.storage import SqlStorage +from mlos_core.tests import get_all_concrete_subclasses mock_opt = optimizers_fixtures.mock_opt sqlite_storage = sql_storage_fixtures.sqlite_storage @@ -33,8 +31,9 @@ def create_scheduler( sqlite_storage: SqlStorage, global_config: dict, ) -> Scheduler: - """Create a Scheduler instance using trial_runners, mock_opt, and - sqlite_storage fixtures.""" + """Create a Scheduler instance using trial_runners, mock_opt, and sqlite_storage + fixtures. + """ env = trial_runners[0].environment assert isinstance(env, MockEnv), "Environment is not a MockEnv instance." From 836dcd9e9b6d38f3ecd6e704ec737e0227d63664 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 16:04:57 -0500 Subject: [PATCH 103/109] comments --- mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 12828bb17d..102f28f066 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -100,7 +100,7 @@ def test_scheduler( scheduler.start() scheduler.teardown() - # Now check the results. + # Now check the overall results. # TODO: # Check the overall results: # 1. Check the results in storage. From 00721415466c31ff772657972092266cef288e2c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 16:20:47 -0500 Subject: [PATCH 104/109] fixups --- .../environments/mock-env-subschema.json | 2 +- .../environments/base_environment.py | 7 +++--- .../mlos_bench/tests/schedulers/conftest.py | 14 +++++++----- .../tests/schedulers/test_scheduler.py | 22 +++++++++++++++++-- .../mlos_bench/tests/storage/sql/fixtures.py | 2 +- 5 files changed, 35 insertions(+), 12 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json b/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json index 0ef370705d..b453c8573a 100644 --- a/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json +++ b/mlos_bench/mlos_bench/config/schemas/environments/mock-env-subschema.json @@ -124,7 +124,7 @@ "description": "A set of mock trial data to use for testing, keyed by trial id. Used by MockEnv.", "type": "object", "patternProperties": { - "^[0-9]+$": { + "^[1-9][0-9]*$": { "$ref": "#/$defs/mock_trial_data_item" } }, diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index e98d410f7a..fe40025f95 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -419,7 +419,8 @@ def trial_runner_id(self) -> int: ) return val - def experiment_id(self) -> int: + @property + def experiment_id(self) -> str: """ Get the ID of the experiment. @@ -428,7 +429,7 @@ def experiment_id(self) -> int: Returns ------- - experiment_id : int + experiment_id : str The ID of the experiment. Notes @@ -441,7 +442,7 @@ def experiment_id(self) -> int: mlos_bench.config : documentation on the configuration system """ val = self._params["experiment_id"] - assert isinstance(val, int), ( + assert isinstance(val, str), ( "Expected experiment_id to be an int, but got %s (type %s)", val, type(val), diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index dffd8e8bad..e5aa302d61 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -6,6 +6,7 @@ # pylint: disable=redefined-outer-name import json +import re import pytest @@ -34,7 +35,7 @@ def mock_env_config() -> dict: "mock_env_metrics": ["score"], # TODO: Add more mock trial data here: "mock_trial_data": { - "0": { + "1": { "run": { "sleep": 0.15, "status": "SUCCEEDED", @@ -43,7 +44,7 @@ def mock_env_config() -> dict: }, }, }, - "1": { + "2": { "run": { "sleep": 0.2, "status": "SUCCEEDED", @@ -52,7 +53,7 @@ def mock_env_config() -> dict: }, }, }, - "2": { + "3": { "run": { "sleep": 0.1, "status": "SUCCEEDED", @@ -67,10 +68,13 @@ def mock_env_config() -> dict: @pytest.fixture -def global_config() -> dict: +def global_config(request) -> dict: """A global config for a MockEnv.""" + test_name = request.node.name + test_name = re.sub(r"[^a-zA-Z0-9]", "_", test_name) + experiment_id = f"TestExperiment-{test_name}" return { - "experiment_id": "TestExperiment", + "experiment_id": experiment_id, "trial_id": 1, } diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 102f28f066..7bfa2c6aac 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -40,8 +40,6 @@ def create_scheduler( max_trials = max(trial_id for trial_id in env.mock_trial_data.keys()) max_trials = min(max_trials, mock_opt.max_suggestions) - global_config["experiment_id"] = f"Test{scheduler_type.__name__}Experiment" - return scheduler_type( config={ "max_trials": max_trials, @@ -92,6 +90,7 @@ def test_scheduler( ) root_env = scheduler.root_environment + experiment_id = root_env.experiment_id assert isinstance(root_env, MockEnv), "Root environment is not a MockEnv instance." mock_trial_data = root_env.mock_trial_data @@ -101,6 +100,25 @@ def test_scheduler( scheduler.teardown() # Now check the overall results. + + # Check the results in storage. + exp_data = sqlite_storage.experiments[experiment_id] + for mock_trial_data in mock_trial_data.values(): + trial_id = mock_trial_data.trial_id + assert trial_id in exp_data.trials, f"Trial {trial_id} not found in storage." + trial_data = exp_data.trials[trial_id] + + # Check the results. + metrics = mock_trial_data.run.metrics + if metrics: + for result_key, result_value in metrics.items(): + assert ( + result_key in trial_data.results_dict + ), f"Result column {result_key} not found in storage." + assert ( + trial_data.results_dict[result_key] == result_value + ), f"Result value for {result_key} does not match expected value." + # TODO: # Check the overall results: # 1. Check the results in storage. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 0bebeeff82..db6dc5fa2e 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -30,7 +30,7 @@ # pylint: disable=redefined-outer-name -@pytest.fixture +@pytest.fixture(scope="function") def sqlite_storage() -> Generator[SqlStorage]: """ Fixture for file based SQLite storage in a temporary directory. From 5400bd6805ae7a8723b270898ceeb5beb8990f5e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 16:26:08 -0500 Subject: [PATCH 105/109] more checks --- .../tests/schedulers/test_scheduler.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 7bfa2c6aac..c520aca777 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -114,16 +114,32 @@ def test_scheduler( for result_key, result_value in metrics.items(): assert ( result_key in trial_data.results_dict - ), f"Result column {result_key} not found in storage." + ), f"Result {result_key} not found in storage for trial {trial_data}." assert ( trial_data.results_dict[result_key] == result_value ), f"Result value for {result_key} does not match expected value." + else: + # metrics weren't explicit in the mock trial data, so we only check + # that a score was registered + for opt_target in mock_opt.targets: + assert ( + opt_target in trial_data.results_dict + ), f"Result column {opt_target} not found in storage." + assert ( + trial_data.results_dict[opt_target] is not None + ), f"Result value for {opt_target} is None." + + assert ( + trial_data.status == mock_trial_data.run.status + ), f"Trial {trial_id} status {trial_data.status} was not {mock_trial_data.run.status}." + + # TODO: Check the trial status telemetry. # TODO: - # Check the overall results: - # 1. Check the results in storage. # 2. Check the optimizer registration. # 3. Check the bookkeeping for ran_trials. + + # TODO: And check the intermediary results. # 4. Check the bookkeeping for add_new_optimizer_suggestions and _last_trial_id. # This last part may require patching and intercepting during the start() # loop to validate in-progress book keeping instead of just overall. From e62a3787656b524121b7e713f0117d31fd2b70ba Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 17:21:43 -0500 Subject: [PATCH 106/109] Add more checks --- .../mlos_bench/optimizers/base_optimizer.py | 4 +- .../mlos_bench/optimizers/mock_optimizer.py | 42 +++++++ .../tests/schedulers/test_scheduler.py | 108 +++++++++++++++--- 3 files changed, 134 insertions(+), 20 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 44aa9a035e..72b437b320 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -356,8 +356,10 @@ def _get_scores( assert scores is not None target_metrics: dict[str, float] = {} for opt_target, opt_dir in self._opt_targets.items(): + if opt_target not in scores: + raise ValueError(f"Score for {opt_target} not found in {scores}.") val = scores[opt_target] - assert val is not None + assert val is not None, f"Score for {opt_target} is None." target_metrics[opt_target] = float(val) * opt_dir return target_metrics diff --git a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py index 947e34a7da..60eed31b46 100644 --- a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py @@ -15,6 +15,7 @@ import random from collections.abc import Callable, Sequence +from dataclasses import dataclass from mlos_bench.environments.status import Status from mlos_bench.optimizers.track_best_optimizer import TrackBestOptimizer from mlos_bench.services.base_service import Service @@ -25,6 +26,15 @@ _LOG = logging.getLogger(__name__) +@dataclass +class RegisteredScore: + """A registered score for a trial.""" + + config: TunableGroups + score: dict[str, TunableValue] | None + status: Status + + class MockOptimizer(TrackBestOptimizer): """Mock optimizer to test the Environment API.""" @@ -42,6 +52,38 @@ def __init__( "float": lambda tunable: rnd.uniform(*tunable.range), "int": lambda tunable: rnd.randint(*(int(x) for x in tunable.range)), } + self._registered_scores: list[RegisteredScore] = [] + + @property + def registered_scores(self) -> list[RegisteredScore]: + """Return the list of registered scores. + + Notes + ----- + Used for testing and validation. + """ + return self._registered_scores + + def register( + self, + tunables: TunableGroups, + status: Status, + score: dict[str, TunableValue] | None = None, + ) -> dict[str, float] | None: + # Track the registered scores for testing and validation. + score = score or {} + # Almost the same as _get_scores, but we don't adjust the direction here. + scores: dict[str, TunableValue] = { + k: float(v) for k, v in score.items() if k in self._opt_targets and v is not None + } + self._registered_scores.append( + RegisteredScore( + config=tunables.copy(), + score=scores, + status=status, + ) + ) + return super().register(tunables, status, score) def bulk_register( self, diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index c520aca777..3af7e136a4 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -4,6 +4,7 @@ # """Unit tests for :py:class:`mlos_bench.schedulers` and their internals.""" +from logging import warning import sys from unittest.mock import patch @@ -15,12 +16,15 @@ from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner +from mlos_bench.storage.base_trial_data import TrialData from mlos_bench.storage.sql.storage import SqlStorage from mlos_core.tests import get_all_concrete_subclasses mock_opt = optimizers_fixtures.mock_opt sqlite_storage = sql_storage_fixtures.sqlite_storage +DEBUG_WARNINGS_ENABLED = False + # pylint: disable=redefined-outer-name @@ -52,6 +56,47 @@ def create_scheduler( ) +def debug_warn(*args: object) -> None: + """Optionally issue warnings for debugging.""" + if DEBUG_WARNINGS_ENABLED: + warning(*args) + + +def mock_opt_has_registered_trial_score( + mock_opt: MockOptimizer, + trial_data: TrialData, +) -> bool: + """Check that the MockOptimizer has registered a given MockTrialData.""" + # pylint: disable=consider-using-any-or-all + # Split out for easier debugging. + for registered_score in mock_opt.registered_scores: + match = True + if registered_score.status != trial_data.status: + match = False + debug_warn( + "Registered status %s does not match trial data %s.", + registered_score.status, + trial_data.results_dict, + ) + elif registered_score.score != trial_data.results_dict: + debug_warn( + "Registered score %s does not match trial data %s.", + registered_score.score, + trial_data.results_dict, + ) + match = False + elif registered_score.config.get_param_values() != trial_data.tunable_config.config_dict: + debug_warn( + "Registered config %s does not match trial data %s.", + registered_score.config.get_param_values(), + trial_data.results_dict, + ) + match = False + if match: + return True + return False + + scheduler_classes = get_all_concrete_subclasses( Scheduler, # type: ignore[type-abstract] pkg_name="mlos_bench", @@ -67,7 +112,7 @@ def create_scheduler( sys.platform == "win32", reason="Skipping test on Windows - SQLite storage is not accessible in parallel tests there.", ) -def test_scheduler( +def test_scheduler_with_mock_trial_data( scheduler_class: type[Scheduler], trial_runners: list[TrialRunner], mock_opt: MockOptimizer, @@ -91,8 +136,8 @@ def test_scheduler( root_env = scheduler.root_environment experiment_id = root_env.experiment_id - assert isinstance(root_env, MockEnv), "Root environment is not a MockEnv instance." - mock_trial_data = root_env.mock_trial_data + assert isinstance(root_env, MockEnv), f"Root environment {root_env} is not a MockEnv." + assert root_env.mock_trial_data, "No mock trial data found in root environment." # Run the scheduler with scheduler: @@ -100,11 +145,19 @@ def test_scheduler( scheduler.teardown() # Now check the overall results. - - # Check the results in storage. + ran_trials = {trial.trial_id for trial in scheduler.ran_trials} + assert ( + experiment_id in sqlite_storage.experiments + ), f"Experiment {experiment_id} not found in storage." exp_data = sqlite_storage.experiments[experiment_id] - for mock_trial_data in mock_trial_data.values(): + + for mock_trial_data in root_env.mock_trial_data.values(): trial_id = mock_trial_data.trial_id + + # Check the bookkeeping for ran_trials. + assert trial_id in ran_trials, f"Trial {trial_id} not found in Scheduler.ran_trials." + + # Check the results in storage. assert trial_id in exp_data.trials, f"Trial {trial_id} not found in storage." trial_data = exp_data.trials[trial_id] @@ -118,26 +171,43 @@ def test_scheduler( assert ( trial_data.results_dict[result_key] == result_value ), f"Result value for {result_key} does not match expected value." - else: - # metrics weren't explicit in the mock trial data, so we only check - # that a score was registered - for opt_target in mock_opt.targets: - assert ( - opt_target in trial_data.results_dict - ), f"Result column {opt_target} not found in storage." - assert ( - trial_data.results_dict[opt_target] is not None - ), f"Result value for {opt_target} is None." + # TODO: Should we check the reverse - no extra metrics were registered? + # else: metrics weren't explicit in the mock trial data, so we only + # check that a score was stored for the optimization target, but that's + # good to do regardless + for opt_target in mock_opt.targets: + assert ( + opt_target in trial_data.results_dict + ), f"Result column {opt_target} not found in storage." + assert ( + trial_data.results_dict[opt_target] is not None + ), f"Result value for {opt_target} is None." + + # Check that the appropriate sleeps occurred. + trial_time_lb = 0.0 + trial_time_lb += mock_trial_data.setup.sleep or 0 + trial_time_lb += mock_trial_data.run.sleep or 0 + trial_time_lb += mock_trial_data.status.sleep or 0 + trial_time_lb += mock_trial_data.teardown.sleep or 0 + assert trial_data.ts_end is not None, f"Trial {trial_id} has no end time." + trial_duration = trial_data.ts_end - trial_data.ts_start + trial_dur_secs = trial_duration.total_seconds() + assert ( + trial_dur_secs >= trial_time_lb + ), f"Trial {trial_id} took less time ({trial_dur_secs}) than expected ({trial_time_lb}). " + # Check that the trial status matches what we expected. assert ( trial_data.status == mock_trial_data.run.status ), f"Trial {trial_id} status {trial_data.status} was not {mock_trial_data.run.status}." # TODO: Check the trial status telemetry. - # TODO: - # 2. Check the optimizer registration. - # 3. Check the bookkeeping for ran_trials. + # Check the optimizer registration. + assert mock_opt_has_registered_trial_score( + mock_opt, + trial_data, + ), f"Trial {trial_id} was not registered in the optimizer." # TODO: And check the intermediary results. # 4. Check the bookkeeping for add_new_optimizer_suggestions and _last_trial_id. From 206bb779b5b693110de4398db0d3adaa4240672b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 22:23:33 +0000 Subject: [PATCH 107/109] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/optimizers/mock_optimizer.py | 5 +++-- mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py index 60eed31b46..a1311b6f95 100644 --- a/mlos_bench/mlos_bench/optimizers/mock_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mock_optimizer.py @@ -14,8 +14,8 @@ import logging import random from collections.abc import Callable, Sequence - from dataclasses import dataclass + from mlos_bench.environments.status import Status from mlos_bench.optimizers.track_best_optimizer import TrackBestOptimizer from mlos_bench.services.base_service import Service @@ -56,7 +56,8 @@ def __init__( @property def registered_scores(self) -> list[RegisteredScore]: - """Return the list of registered scores. + """ + Return the list of registered scores. Notes ----- diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 3af7e136a4..04a998ea22 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -4,8 +4,8 @@ # """Unit tests for :py:class:`mlos_bench.schedulers` and their internals.""" -from logging import warning import sys +from logging import warning from unittest.mock import patch import pytest From abe412be2b06287a990b6489eff000b881ce8177 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 17:24:54 -0500 Subject: [PATCH 108/109] linting --- mlos_bench/mlos_bench/tests/schedulers/conftest.py | 3 ++- mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/schedulers/conftest.py b/mlos_bench/mlos_bench/tests/schedulers/conftest.py index e5aa302d61..df6bd2776f 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/conftest.py +++ b/mlos_bench/mlos_bench/tests/schedulers/conftest.py @@ -9,6 +9,7 @@ import re import pytest +from pytest import FixtureRequest from mlos_bench.environments.mock_env import MockEnv from mlos_bench.schedulers.trial_runner import TrialRunner @@ -68,7 +69,7 @@ def mock_env_config() -> dict: @pytest.fixture -def global_config(request) -> dict: +def global_config(request: FixtureRequest) -> dict: """A global config for a MockEnv.""" test_name = request.node.name test_name = re.sub(r"[^a-zA-Z0-9]", "_", test_name) diff --git a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py index 04a998ea22..9a2cc1dbae 100644 --- a/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py +++ b/mlos_bench/mlos_bench/tests/schedulers/test_scheduler.py @@ -6,7 +6,6 @@ import sys from logging import warning -from unittest.mock import patch import pytest @@ -38,7 +37,6 @@ def create_scheduler( """Create a Scheduler instance using trial_runners, mock_opt, and sqlite_storage fixtures. """ - env = trial_runners[0].environment assert isinstance(env, MockEnv), "Environment is not a MockEnv instance." max_trials = max(trial_id for trial_id in env.mock_trial_data.keys()) From f5cb4689bde3a435dd8ec05b6111efe528a3735a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 22 May 2025 17:26:54 -0500 Subject: [PATCH 109/109] doc tweaks --- mlos_bench/mlos_bench/environments/status.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/status.py b/mlos_bench/mlos_bench/environments/status.py index 6343d3e854..aa3b3e99c1 100644 --- a/mlos_bench/mlos_bench/environments/status.py +++ b/mlos_bench/mlos_bench/environments/status.py @@ -136,8 +136,8 @@ def is_timed_out(self) -> bool: successfully or not. This set is used to determine if a trial or experiment has reached a final state. This includes: -- :py:data:`.Status.SUCCEEDED`: The trial or experiment completed successfully. -- :py:data:`.Status.CANCELED`: The trial or experiment was canceled. -- :py:data:`.Status.FAILED`: The trial or experiment failed. -- :py:data:`.Status.TIMED_OUT`: The trial or experiment timed out. +- :py:attr:`.Status.SUCCEEDED`: The trial or experiment completed successfully. +- :py:attr:`.Status.CANCELED`: The trial or experiment was canceled. +- :py:attr:`.Status.FAILED`: The trial or experiment failed. +- :py:attr:`.Status.TIMED_OUT`: The trial or experiment timed out. """