diff --git a/.cspell.json b/.cspell.json index 2cd9280fc8d..f4bc99063c2 100644 --- a/.cspell.json +++ b/.cspell.json @@ -21,6 +21,7 @@ "discretization", "discretize", "drivername", + "dropna", "dstpath", "dtype", "duckdb", diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index aa948b8125a..613d55dffb9 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -302,7 +302,7 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFr self.optimizer_parameter_space.check_configuration(trial.config) assert trial.config.config_space == self.optimizer_parameter_space self.trial_info_map[trial.config] = trial - config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) + config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())).dropna(axis=1) return config_df, None def register_pending(self, *, configs: pd.DataFrame, diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index 8fcf592a6c9..60237e68354 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -96,14 +96,14 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, if context is not None: assert len(configs) == len(context), \ "Mismatched number of configs and context." - assert configs.shape[1] == len(self.parameter_space.values()), \ + assert configs.shape[1] <= len(self.parameter_space.values()), \ "Mismatched configuration shape." self._observations.append((configs, scores, context)) self._has_context = context is not None if self._space_adapter: configs = self._space_adapter.inverse_transform(configs) - assert configs.shape[1] == len(self.optimizer_parameter_space.values()), \ + assert configs.shape[1] <= len(self.optimizer_parameter_space.values()), \ "Mismatched configuration shape after inverse transform." return self._register(configs=configs, scores=scores, context=context) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 8231e59feb9..5c1a8e9ea7e 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -400,3 +400,75 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) assert all_contexts is None + + +@pytest.mark.parametrize(("optimizer_type", "kwargs"), [ + # Default optimizer + (None, {}), + # Enumerate all supported Optimizers + *[(member, {}) for member in OptimizerType], + # Optimizer with non-empty kwargs argument +]) +def test_hierarchical_input_space(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: + """ + Toy problem to test the optimizers with hierarchical types to ensure that the returned types are properly handled + """ + max_iterations = 10 + if kwargs is None: + kwargs = {} + + def objective(point: pd.DataFrame) -> pd.DataFrame: + # Two different functions based on the switch + if point["switch"].iloc[0] == "a": + return pd.DataFrame({"score": point["a"] + point["c"]}) + else: + return pd.DataFrame({"score": 2 * point["b"] + point["c"]}) + + # Initialize a hierarchical configuration space + input_space = CS.ConfigurationSpace(seed=SEED) + input_space.add_hyperparameter(CS.CategoricalHyperparameter(name="switch", choices=["a", "b"])) + input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="a", lower=0.0, upper=5.0)) + input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="b", lower=0.0, upper=5.0)) + input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="c", lower=0.0, upper=5.0)) + input_space.add_condition(CS.EqualsCondition(input_space["a"], input_space["switch"], "a")) + input_space.add_condition(CS.EqualsCondition(input_space["b"], input_space["switch"], "b")) + + if optimizer_type is None: + optimizer = OptimizerFactory.create( + parameter_space=input_space, + optimization_targets=['score'], + optimizer_kwargs=kwargs, + ) + else: + optimizer = OptimizerFactory.create( + parameter_space=input_space, + optimization_targets=['score'], + optimizer_type=optimizer_type, + optimizer_kwargs=kwargs, + ) + + for _ in range(max_iterations): + suggestion, metadata = optimizer.suggest() + + # Check that suggestion is returning valid column combinations + assert isinstance(suggestion, pd.DataFrame) + assert {'switch', 'c'}.issubset(suggestion.columns) + assert {'a'}.issubset(suggestion.columns) ^ {'b'}.issubset(suggestion.columns) + + # Check suggestion values are the expected dtype + assert suggestion["switch"].iloc[0] == "a" or suggestion["switch"].iloc[0] == "b" + if suggestion["switch"].iloc[0] == "a": + assert isinstance(suggestion['a'].iloc[0], np.floating) + else: + assert isinstance(suggestion['b'].iloc[0], np.floating) + assert isinstance(suggestion['c'].iloc[0], np.floating) + + # Check that suggestion is in the space + test_configuration = CS.Configuration(optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict()) + # Raises an error if outside of configuration space + test_configuration.is_valid_configuration() + + # Test registering the suggested configuration with a score. + observation = objective(suggestion) + assert isinstance(observation, pd.DataFrame) + optimizer.register(configs=suggestion, scores=observation, metadata=metadata)