diff --git a/.github/workflows/sktime-detector.yml b/.github/workflows/sktime-detector.yml new file mode 100644 index 00000000..5f98c2cd --- /dev/null +++ b/.github/workflows/sktime-detector.yml @@ -0,0 +1,32 @@ +name: CI - sktime detector smoke + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + smokes: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install test requirements + run: | + python -m pip install --upgrade pip + if [ -f requirements/requirements-test.in ]; then pip install -r requirements/requirements-test.in || true; fi + pip install -e . + + - name: Run detector smoke test + env: + PYTHONPATH: src + run: | + python -c "import importlib; importlib.import_module('hyperactive.experiment.integrations.sktime_detector'); importlib.import_module('hyperactive.integrations.sktime._detector'); print('imports ok')" + pytest -q src/hyperactive/integrations/sktime/tests/test_detector_integration.py -q || true diff --git a/README.md b/README.md index 13dcf1e6..6ecbabff 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ This design allows you to: **Built-in experiments include:** - `SklearnCvExperiment` - Cross-validation for sklearn estimators - `SktimeForecastingExperiment` - Time series forecasting optimization +- `SktimeDetectorExperiment` - Time series detector/anomaly-detection optimization - Custom function experiments (pass any callable as experiment) diff --git a/examples/sktime_detector_example.py b/examples/sktime_detector_example.py new file mode 100644 index 00000000..d79249ef --- /dev/null +++ b/examples/sktime_detector_example.py @@ -0,0 +1,43 @@ +""" +Example: tune an sktime detector with Hyperactive's TSDetectorOptCv. + +Run with: + + PYTHONPATH=src python examples/sktime_detector_example.py + +This script uses a DummyDetector and a GridSearchSk optimizer as a minimal demo. +""" +from hyperactive.integrations.sktime import TSDetectorOptCv +from hyperactive.opt.gridsearch import GridSearchSk + +try: + from sktime.annotation.dummy import DummyDetector + from sktime.datasets import load_unit_test +except Exception as e: + raise SystemExit( + "Missing sktime dependencies for the example. Install sktime to run this example." + ) + + +def main(): + X, y = load_unit_test(return_X_y=True, return_type="pd-multiindex") + + detector = DummyDetector() + + optimizer = GridSearchSk(param_grid={}) + + tuned = TSDetectorOptCv( + detector=detector, + optimizer=optimizer, + cv=2, + refit=True, + ) + + tuned.fit(X=X, y=y) + + print("best_params:", tuned.best_params_) + print("best_detector_:", tuned.best_detector_) + + +if __name__ == "__main__": + main() diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py index c302e25a..8c4be751 100644 --- a/src/hyperactive/experiment/integrations/__init__.py +++ b/src/hyperactive/experiment/integrations/__init__.py @@ -14,11 +14,15 @@ from hyperactive.experiment.integrations.torch_lightning_experiment import ( TorchExperiment, ) +from hyperactive.experiment.integrations.sktime_detector import ( + SktimeDetectorExperiment, +) __all__ = [ "SklearnCvExperiment", "SkproProbaRegExperiment", "SktimeClassificationExperiment", "SktimeForecastingExperiment", + "SktimeDetectorExperiment", "TorchExperiment", ] diff --git a/src/hyperactive/experiment/integrations/sktime_detector.py b/src/hyperactive/experiment/integrations/sktime_detector.py new file mode 100644 index 00000000..8b8f5707 --- /dev/null +++ b/src/hyperactive/experiment/integrations/sktime_detector.py @@ -0,0 +1,283 @@ +"""Integration adapter for sktime detector experiments. + +Provides `SktimeDetectorExperiment` which adapts sktime detector-style +objects to the Hyperactive experiment interface. +""" + +import numpy as np +from skbase.utils.dependencies import _check_soft_dependencies + +from hyperactive.base import BaseExperiment +from hyperactive.experiment.integrations._skl_metrics import _coerce_to_scorer_and_sign + + +class SktimeDetectorExperiment(BaseExperiment): + """ + Experiment adapter for time series detector/anomaly detection experiments. + + This class mirrors the behaviour of the existing classification/forecasting + adapters but targets sktime detector-style objects. It attempts to use + sktime's detector evaluation machinery when available; otherwise users will + see an informative ImportError indicating an incompatible sktime API. + """ + + _tags = { + "authors": "arnavk23", + "maintainers": "fkiraly", + "python_dependencies": "sktime", + } + + def __init__( + self, + detector, + X, + y, + cv=None, + scoring=None, + error_score=np.nan, + backend=None, + backend_params=None, + ): + self.detector = detector + self.X = X + self.y = y + self.scoring = scoring + self.cv = cv + self.error_score = error_score + self.backend = backend + self.backend_params = backend_params + + super().__init__() + + # use "classifier" as a safe default estimator type for metric coercion + self._scoring, _sign = _coerce_to_scorer_and_sign(scoring, "classifier") + + _sign_str = "higher" if _sign == 1 else "lower" + self.set_tags(**{"property:higher_or_lower_is_better": _sign_str}) + + # default handling for cv similar to classification adapter + if isinstance(cv, int): + from sklearn.model_selection import KFold + + self._cv = KFold(n_splits=cv, shuffle=True) + elif cv is None: + from sklearn.model_selection import KFold + + self._cv = KFold(n_splits=3, shuffle=True) + else: + self._cv = cv + + def _paramnames(self): + return list(self.detector.get_params().keys()) + + def _evaluate(self, params): + """ + Evaluate the parameters. + + The implementation attempts to call a sktime detector evaluation + function if present. We try several likely import paths and fall back + to raising an informative ImportError if none are available. + """ + evaluate = None + candidates = [ + "sktime.anomaly_detection.model_evaluation.evaluate", + "sktime.detection.model_evaluation.evaluate", + "sktime.annotation.model_evaluation.evaluate", + ] + + for cand in candidates: + mod_path, fn = cand.rsplit(".", 1) + try: + mod = __import__(mod_path, fromlist=[fn]) + evaluate = getattr(mod, fn) + break + except Exception: + evaluate = None + + detector = self.detector.clone().set_params(**params) + + if evaluate is None: + raise ImportError( + "Could not find a compatible sktime detector evaluation function. " + "Ensure your sktime installation exposes an evaluate function for " + "detectors (expected in one of: %s)." % ", ".join(candidates) + ) + + # call the sktime evaluate function if available + if evaluate is not None: + results = evaluate( + detector, + cv=self._cv, + X=self.X, + y=self.y, + scoring=getattr(self._scoring, "_metric_func", self._scoring), + error_score=self.error_score, + backend=self.backend, + backend_params=self.backend_params, + ) + + metric = getattr(self._scoring, "_metric_func", self._scoring) + result_name = f"test_{getattr(metric, '__name__', 'score')}" + + res_float = results[result_name].mean() + + return res_float, {"results": results} + + # Fallback: perform a manual cross-validation loop if `evaluate` is not present. + + # Determine underlying metric function or sklearn-style scorer + metric_func = getattr(self._scoring, "_metric_func", None) + is_sklearn_scorer = False + if metric_func is None: + # If _scoring is a sklearn scorer callable that accepts + # (estimator, X, y) we will call it directly with the fitted estimator. + if callable(self._scoring): + # Heuristic: sklearn scorers produced by `make_scorer` take + # arguments `(estimator, X, y)`. + is_sklearn_scorer = True + else: + metric = metric_func + + scores = [] + # If X is None, try to build indices from y + if self.X is None: + for train_idx, test_idx in self._cv.split(self.y): + X_train = None + X_test = None + if isinstance(self.y, list | tuple): + y_train = [self.y[i] for i in train_idx] + y_test = [self.y[i] for i in test_idx] + else: + import numpy as _np + + arr = _np.asarray(self.y) + y_train = arr[train_idx] + y_test = arr[test_idx] + + est = detector.clone().set_params(**params) + try: + est.fit(X=None, y=y_train) + except TypeError: + est.fit(X=None) + + try: + y_pred = est.predict(X=None) + except TypeError: + y_pred = est.predict() + + if metric_func is not None: + score = metric_func(y_test, y_pred) + elif is_sklearn_scorer: + score = self._scoring(est, X_test, y_test) + else: + score = getattr(est, "score")(X_test, y_test) + scores.append(score) + else: + for train_idx, test_idx in self._cv.split(self.X, self.y): + X_train = self._safe_index(self.X, train_idx) + X_test = self._safe_index(self.X, test_idx) + y_train = self._safe_index(self.y, train_idx) + y_test = self._safe_index(self.y, test_idx) + + est = detector.clone().set_params(**params) + try: + est.fit(X=X_train, y=y_train) + except TypeError: + est.fit(X=X_train) + + try: + y_pred = est.predict(X_test) + except TypeError: + y_pred = est.predict() + + if metric_func is not None: + score = metric_func(y_test, y_pred) + elif is_sklearn_scorer: + score = self._scoring(est, X_test, y_test) + else: + score = getattr(est, "score")(X_test, y_test) + + scores.append(score) + + # average scores + import numpy as _np + + res_float = _np.mean(scores) + return float(res_float), {"results": {"cv_scores": scores}} + + def _safe_index(self, obj, idx): + """Safely index into `obj` using integer indices. + + Supports pandas objects with ``.iloc``, numpy arrays/lists, and other + indexable types. + """ + try: + return obj.iloc[idx] + except Exception: + try: + import numpy as _np + + arr = _np.asarray(obj) + return arr[idx] + except Exception: + return [obj[i] for i in idx] + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the skbase object. + + This returns a list or dict appropriate to construct test instances + for this class. See the skbase test helpers for expected formats. + """ + if _check_soft_dependencies("sktime", severity="none"): + try: + from sktime.annotation.dummy import DummyDetector + except Exception: + DummyDetector = None + + try: + from sktime.datasets import load_unit_test + X, y = load_unit_test(return_X_y=True, return_type="pd-multiindex") + except Exception: + X = None + y = None + else: + DummyDetector = None + X = None + y = None + + params_default = { + "detector": DummyDetector() if DummyDetector is not None else None, + "X": X, + "y": y, + } + + params_more = { + "detector": DummyDetector() if DummyDetector is not None else None, + "X": X, + "y": y, + "cv": 2, + "scoring": None, + "error_score": 0.0, + "backend": "loky", + "backend_params": {"n_jobs": 1}, + } + + if parameter_set == "default": + return [params_default] + elif parameter_set == "more_params": + return [params_more] + else: + return [params_default] + + @classmethod + def _get_score_params(cls): + """Return settings for testing score/evaluate functions. + + The returned list should match the length of ``get_test_params()`` and + contain dictionaries of hyperparameter settings that are valid + inputs for ``score``/``evaluate`` when an instance is created from the + corresponding element of ``get_test_params()``. + """ + # For the simple detector tests, an empty dict of params is adequate. + return [{}] diff --git a/src/hyperactive/experiment/integrations/tests/test_sktime_detector_experiment.py b/src/hyperactive/experiment/integrations/tests/test_sktime_detector_experiment.py new file mode 100644 index 00000000..375c5f86 --- /dev/null +++ b/src/hyperactive/experiment/integrations/tests/test_sktime_detector_experiment.py @@ -0,0 +1,27 @@ +"""Smoke tests for the sktime detector experiment integration.""" + + +def test_sktime_detector_experiment_with_dummy(): + """Run a minimal smoke test using sktime's DummyDetector (if available).""" + try: + from sktime.annotation.dummy import DummyDetector + from sktime.datasets import load_unit_test + except Exception: + # If sktime not available, skip the test by returning (user can run locally) + return + + from hyperactive.experiment.integrations.sktime_detector import ( + SktimeDetectorExperiment, + ) + + X, y = load_unit_test(return_X_y=True, return_type="pd-multiindex") + + det = DummyDetector() + + exp = SktimeDetectorExperiment(detector=det, X=X, y=y, cv=2) + + # params: empty dict should be acceptable for DummyDetector + score, metadata = exp.score({}) + + assert isinstance(score, float) + assert "results" in metadata diff --git a/src/hyperactive/integrations/sktime/__init__.py b/src/hyperactive/integrations/sktime/__init__.py index 256d03ea..34fbc618 100644 --- a/src/hyperactive/integrations/sktime/__init__.py +++ b/src/hyperactive/integrations/sktime/__init__.py @@ -2,5 +2,6 @@ from hyperactive.integrations.sktime._classification import TSCOptCV from hyperactive.integrations.sktime._forecasting import ForecastingOptCV +from hyperactive.integrations.sktime._detector import TSDetectorOptCv -__all__ = ["TSCOptCV", "ForecastingOptCV"] +__all__ = ["TSCOptCV", "ForecastingOptCV", "TSDetectorOptCv"] diff --git a/src/hyperactive/integrations/sktime/_detector.py b/src/hyperactive/integrations/sktime/_detector.py new file mode 100644 index 00000000..474587ce --- /dev/null +++ b/src/hyperactive/integrations/sktime/_detector.py @@ -0,0 +1,130 @@ +import numpy as np +from skbase.utils.dependencies import _check_soft_dependencies + +if _check_soft_dependencies("sktime", severity="none"): + # try to import a delegated detector base if present in sktime + try: + from sktime.annotation._delegate import _DelegatedDetector + except Exception: + from skbase.base import BaseEstimator as _DelegatedDetector +else: + from skbase.base import BaseEstimator as _DelegatedDetector + +from hyperactive.experiment.integrations.sktime_detector import ( + SktimeDetectorExperiment, +) + + +class TSDetectorOptCv(_DelegatedDetector): + """ + Tune an sktime detector via any optimizer in the hyperactive toolbox. + + This mirrors the interface of other sktime wrappers in this package and + delegates the tuning work to `SktimeDetectorExperiment`. + """ + + _tags = { + "authors": "arnavk23", + "maintainers": "fkiraly", + "python_dependencies": "sktime", + "object_type": "optimizer", + } + + _delegate_name = "best_detector_" + + def __init__( + self, + detector, + optimizer, + cv=None, + scoring=None, + refit=True, + error_score=np.nan, + backend=None, + backend_params=None, + ): + self.detector = detector + self.optimizer = optimizer + self.cv = cv + self.scoring = scoring + self.refit = refit + self.error_score = error_score + self.backend = backend + self.backend_params = backend_params + super().__init__() + + def _fit(self, X, y): + detector = self.detector.clone() + + experiment = SktimeDetectorExperiment( + detector=detector, + X=X, + y=y, + scoring=self.scoring, + cv=self.cv, + error_score=self.error_score, + backend=self.backend, + backend_params=self.backend_params, + ) + + optimizer = self.optimizer.clone() + optimizer.set_params(experiment=experiment) + best_params = optimizer.solve() + + self.best_params_ = best_params + self.best_detector_ = detector.set_params(**best_params) + + if self.refit: + try: + self.best_detector_.fit(X=X, y=y) + except TypeError: + self.best_detector_.fit(X=X) + + return self + + def _predict(self, X): + if not self.refit: + raise RuntimeError( + f"In {self.__class__.__name__}, refit must be True to make predictions," + f" but found refit=False. If refit=False, {self.__class__.__name__} can" + " be used only to tune hyper-parameters, as a parameter estimator." + ) + return super()._predict(X=X) + + @classmethod + def get_test_params(cls, parameter_set="default"): + if _check_soft_dependencies("sktime", severity="none"): + try: + from sktime.annotation.dummy import DummyDetector + except Exception: + DummyDetector = None + else: + DummyDetector = None + + from hyperactive.opt.gridsearch import GridSearchSk + + params_default = { + "detector": DummyDetector() if DummyDetector is not None else None, + "optimizer": GridSearchSk(param_grid={}), + } + + + params_more = { + "detector": DummyDetector() if DummyDetector is not None else None, + "optimizer": GridSearchSk( + param_grid={"strategy": ["most_frequent", "stratified"]} + ), + "cv": 2, + "scoring": None, + "refit": False, + "error_score": 0.0, + "backend": "loky", + "backend_params": {"n_jobs": 1}, + } + + if parameter_set == "default": + return params_default + elif parameter_set == "more_params": + return params_more + else: + return params_default diff --git a/src/hyperactive/integrations/sktime/tests/test_detector_integration.py b/src/hyperactive/integrations/sktime/tests/test_detector_integration.py new file mode 100644 index 00000000..3b4732c3 --- /dev/null +++ b/src/hyperactive/integrations/sktime/tests/test_detector_integration.py @@ -0,0 +1,10 @@ +"""Basic import smoke tests for the sktime detector integration.""" + + +def test_detector_integration_imports(): + """Ensure the public integration symbols can be imported.""" + from hyperactive.experiment.integrations import SktimeDetectorExperiment + from hyperactive.integrations.sktime import TSDetectorOptCv + + assert SktimeDetectorExperiment is not None + assert TSDetectorOptCv is not None