diff --git a/src/hyperactive/opt/_adapters/_gfo.py b/src/hyperactive/opt/_adapters/_gfo.py index 5edd81c9..d2da74c1 100644 --- a/src/hyperactive/opt/_adapters/_gfo.py +++ b/src/hyperactive/opt/_adapters/_gfo.py @@ -23,11 +23,14 @@ class _BaseGFOadapter(BaseOptimizer): _tags = { "authors": "SimonBlanke", "python_dependencies": ["gradient-free-optimizers>=1.5.0"], + "capability:categorical": "encoded", } def __init__(self): super().__init__() + self._categorical_mappings = {} + if self.initialize is None: self._initialize = {"grid": 4, "random": 2, "vertices": 4} else: @@ -87,8 +90,18 @@ def _handle_gfo_defaults(self, search_config): def _to_dict_np(self, search_space): """Coerce the search space to a format suitable for gfo optimizers. - gfo expects dicts of numpy arrays, not lists. - This method coerces lists or tuples in the search space to numpy arrays. + gfo expects dicts of numpy arrays, not lists. This method coerces + lists or tuples in the search space to numpy arrays. + + In addition, this handles categorical dimensions by encoding them to + consecutive integers, while keeping track of the original levels in + ``self._categorical_mappings`` for decoding during evaluation and when + returning ``best_params_``. A dimension is treated as categorical if + its dtype is non-numeric (string, object, or boolean). + + Note: ``np.unique`` sorts values, so the encoding is not order-preserving. + For example, ``["rbf", "linear"]`` becomes ``["linear", "rbf"]`` with + indices ``[1, 0]}``. Parameters ---------- @@ -108,9 +121,50 @@ def coerce_to_numpy(arr): return np.array(arr) return arr - coerced_search_space = {k: coerce_to_numpy(v) for k, v in search_space.items()} + self._categorical_mappings = {} + coerced_search_space = {} + + for key, value in search_space.items(): + arr = coerce_to_numpy(value) + + if arr.dtype.kind in ("O", "U", "S", "b"): + unique_vals, inverse = np.unique(arr, return_inverse=True) + self._categorical_mappings[key] = list(unique_vals) + coerced_search_space[key] = inverse.astype(int) + else: + coerced_search_space[key] = arr + return coerced_search_space + def _decode_categoricals(self, params): + """Decode integer-encoded categoricals back to original levels. + + Parameters + ---------- + params : dict + Parameter dict as used inside the optimizer/backend. + + Returns + ------- + dict + Parameter dict with any encoded categoricals mapped back to their + original values, if mappings are present. + """ + if not self._categorical_mappings: + return params + + decoded = dict(params) + for key, categories in self._categorical_mappings.items(): + if key not in decoded: + continue + try: + idx = int(decoded[key]) + except (TypeError, ValueError): + continue + if 0 <= idx < len(categories): + decoded[key] = categories[idx] + return decoded + def _solve(self, experiment, **search_config): """Run the optimization search process. @@ -133,13 +187,20 @@ def _solve(self, experiment, **search_config): gfo_cls = self._get_gfo_class() gfopt = gfo_cls(**search_config) + def _objective(params): + decoded_params = self._decode_categoricals(params) + score, _ = experiment.score(decoded_params) + return score + with StdoutMute(active=not self.verbose): gfopt.search( - objective_function=experiment.score, + objective_function=_objective, n_iter=n_iter, max_time=max_time, ) + best_params = gfopt.best_para + best_params = self._decode_categoricals(best_params) return best_params @classmethod diff --git a/src/hyperactive/tests/test_all_objects.py b/src/hyperactive/tests/test_all_objects.py index 559af35d..fce923bf 100644 --- a/src/hyperactive/tests/test_all_objects.py +++ b/src/hyperactive/tests/test_all_objects.py @@ -14,7 +14,6 @@ # default is False, can be set to True by pytest --only_changed_modules True flag ONLY_CHANGED_MODULES = False - class PackageConfig: """Contains package config variables for test classes.""" @@ -50,9 +49,10 @@ class PackageConfig: "info:local_vs_global", # "local", "mixed", "global" "info:explore_vs_exploit", # "explore", "exploit", "mixed" "info:compute", # "low", "middle", "high" + # capabilities + "capability:categorical", ] - class BaseFixtureGenerator(PackageConfig, _BaseFixtureGenerator): """Fixture generator for base testing functionality in sktime. @@ -134,7 +134,6 @@ def softdeps_present(obj): # which sequence the conditional fixtures are generated in fixture_sequence = ["object_class", "object_instance"] - class TestAllObjects(BaseFixtureGenerator, _TestAllObjects): """Generic tests for all objects in the package.""" @@ -167,7 +166,6 @@ def test_valid_object_tags(self, object_instance): super().test_valid_object_class_tags(object_instance) - class ExperimentFixtureGenerator(BaseFixtureGenerator): """Fixture generator for experiments. @@ -182,7 +180,6 @@ class ExperimentFixtureGenerator(BaseFixtureGenerator): object_type_filter = "experiment" - class TestAllExperiments(ExperimentFixtureGenerator, _QuickTester): """Module level tests for all experiment classes.""" @@ -238,7 +235,6 @@ def test_score_function(self, object_class): elif sign_tag == "lower" and det_tag == "deterministic": assert score == -e_score - class OptimizerFixtureGenerator(BaseFixtureGenerator): """Fixture generator for optimizers. @@ -253,7 +249,6 @@ class OptimizerFixtureGenerator(BaseFixtureGenerator): object_type_filter = "optimizer" - class TestAllOptimizers(OptimizerFixtureGenerator, _QuickTester): """Module level tests for all optimizer classes.""" @@ -349,6 +344,43 @@ def test_gfo_integration(self, object_instance): assert "C" in best_params, "Best parameters should contain 'C'" assert "gamma" in best_params, "Best parameters should contain 'gamma'" + def test_gfo_categorical_encoding(self, object_instance): + """GFO optimizers should handle categoricals via internal encoding.""" + from hyperactive.opt._adapters._gfo import _BaseGFOadapter + + if not isinstance(object_instance, _BaseGFOadapter): + return None + + import numpy as np + from sklearn.datasets import load_iris + from sklearn.svm import SVC + + from hyperactive.experiment.integrations import SklearnCvExperiment + + X, y = load_iris(return_X_y=True) + sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y) + + search_space = { + "C": np.array([0.1, 1.0]), + "kernel": np.array(["linear", "rbf"]), + } + _config = { + "search_space": search_space, + "n_iter": 5, + "experiment": sklearn_exp, + } + optimizer = object_instance.clone().set_params(**_config) + optimizer.solve() + best_params = optimizer.best_params_ + + assert isinstance(best_params, dict) + assert "kernel" in best_params + assert best_params["kernel"] in {"linear", "rbf"} + + # Verify internal categorical mappings were populated correctly + assert "kernel" in optimizer._categorical_mappings + assert set(optimizer._categorical_mappings["kernel"]) == {"linear", "rbf"} + def test_selection_direction_backend(self, object_instance): """Backends return argmax over standardized scores on controlled setup. @@ -462,3 +494,4 @@ def _assert_good(best_params): # For other backends, no-op here; targeted direction tests live elsewhere return None +