diff --git a/Orange/classification/outlier_detection.py b/Orange/classification/outlier_detection.py index 3f7693e3b96..52f56253464 100644 --- a/Orange/classification/outlier_detection.py +++ b/Orange/classification/outlier_detection.py @@ -9,43 +9,51 @@ from sklearn.svm import OneClassSVM from Orange.base import SklLearner, SklModel -from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable, \ - Variable -from Orange.data.util import get_unique_names +from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable +from Orange.data.util import get_unique_names, SharedComputeValue from Orange.preprocess import AdaptiveNormalize -from Orange.util import wrap_callback, dummy_callback +from Orange.util import dummy_callback __all__ = ["LocalOutlierFactorLearner", "IsolationForestLearner", "EllipticEnvelopeLearner", "OneClassSVMLearner"] +class _CachedTransform: + # to be used with SharedComputeValue + def __init__(self, model): + self.model = model + + def __call__(self, data): + return self.model.data_to_model_domain(data) + + class _OutlierModel(SklModel): def __init__(self, skl_model): super().__init__(skl_model) - self._cached_data = None self.outlier_var = None + self.cached_transform = _CachedTransform(self) def predict(self, X: np.ndarray) -> np.ndarray: pred = self.skl_model.predict(X) pred[pred == -1] = 0 return pred[:, None] + def new_domain(self, data: Table) -> Domain: + assert self.outlier_var is not None + return Domain(data.domain.attributes, data.domain.class_vars, + data.domain.metas + (self.outlier_var,)) + def __call__(self, data: Table, progress_callback: Callable = None) \ -> Table: assert isinstance(data, Table) - assert self.outlier_var is not None - domain = Domain(data.domain.attributes, data.domain.class_vars, - data.domain.metas + (self.outlier_var,)) + domain = self.new_domain(data) if progress_callback is None: progress_callback = dummy_callback - progress_callback(0, "Preprocessing...") - self._cached_data = self.data_to_model_domain( - data, wrap_callback(progress_callback, end=0.1)) - progress_callback(0.1, "Predicting...") - metas = np.hstack((data.metas, self.predict(self._cached_data.X))) + progress_callback(0, "Predicting...") + new_table = data.transform(domain) progress_callback(1) - return Table.from_numpy(domain, data.X, data.Y, metas) + return new_table class _OutlierLearner(SklLearner): @@ -64,27 +72,17 @@ def _fit_model(self, data: Table) -> _OutlierModel: compute_value=transformer ) - transformer.variable = variable model.outlier_var = variable return model -class _Transformer: +class _Transformer(SharedComputeValue): def __init__(self, model: _OutlierModel): + super().__init__(model.cached_transform) self._model = model - self._variable = None - - @property - def variable(self) -> Variable: - return self._variable - @variable.setter - def variable(self, var: Variable): - self._variable = var - - def __call__(self, data: Table) -> np.ndarray: - assert isinstance(self._variable, Variable) - return self._model(data).get_column_view(self._variable)[0] + def compute(self, data: Table, shared_data: Table) -> np.ndarray: + return self._model.predict(shared_data.X)[:, 0] class OneClassSVMLearner(_OutlierLearner): @@ -142,13 +140,16 @@ def mahalanobis(self, observations: np.ndarray) -> np.ndarray: """ return self.skl_model.mahalanobis(observations)[:, None] - def __call__(self, data: Table, progress_callback: Callable = None) \ - -> Table: - pred = super().__call__(data, progress_callback) - domain = Domain(pred.domain.attributes, pred.domain.class_vars, - pred.domain.metas + (self.mahal_var,)) - metas = np.hstack((pred.metas, self.mahalanobis(self._cached_data.X))) - return Table.from_numpy(domain, pred.X, pred.Y, metas) + def new_domain(self, data: Table) -> Domain: + assert self.mahal_var is not None + domain = super().new_domain(data) + return Domain(domain.attributes, domain.class_vars, + domain.metas + (self.mahal_var,)) + + +class _TransformerMahalanobis(_Transformer): + def compute(self, data: Table, shared_data: Table) -> np.ndarray: + return self._model.mahalanobis(shared_data.X)[:, 0] class EllipticEnvelopeLearner(_OutlierLearner): @@ -166,13 +167,12 @@ def _fit_model(self, data: Table) -> EllipticEnvelopeClassifier: domain = data.domain model = super()._fit_model(data.transform(Domain(domain.attributes))) - transformer = _Transformer(model) + transformer = _TransformerMahalanobis(model) names = [v.name for v in domain.variables + domain.metas] variable = ContinuousVariable( get_unique_names(names, "Mahalanobis"), compute_value=transformer ) - transformer.variable = variable model.mahal_var = variable return model diff --git a/Orange/classification/tests/test_outlier_detection.py b/Orange/classification/tests/test_outlier_detection.py index dc49ad2fcce..8caeedbaf80 100644 --- a/Orange/classification/tests/test_outlier_detection.py +++ b/Orange/classification/tests/test_outlier_detection.py @@ -3,7 +3,7 @@ import pickle import tempfile import unittest -from unittest.mock import Mock +from unittest.mock import Mock, patch import numpy as np @@ -36,6 +36,7 @@ def assert_table_equal(self, table1, table2): np.testing.assert_array_equal(table1.metas, table2.metas) def assert_table_appended_outlier(self, table1, table2, offset=1): + np.testing.assert_array_equal(table1.ids, table2.ids) np.testing.assert_array_equal(table1.X, table2.X) np.testing.assert_array_equal(table1.Y, table2.Y) np.testing.assert_array_equal(table1.metas, table2.metas[:, :-offset]) @@ -47,7 +48,6 @@ def assert_table_appended_outlier(self, table1, table2, offset=1): self.assertEqual(table2.domain.metas[-offset].name, "Outlier") self.assertIsNotNone(table2.domain.metas[-offset].compute_value) - class TestOneClassSVMLearner(_TestDetector): def test_OneClassSVM(self): np.random.seed(42) @@ -128,12 +128,19 @@ def test_EllipticEnvelope(self): def test_mahalanobis(self): n = len(self.X_all) pred = self.model(self.X_all) + y_pred = pred[:, self.model.outlier_var].metas y_mahal = pred[:, self.model.mahal_var].metas y_mahal, y_pred = zip(*sorted(zip(y_mahal, y_pred), reverse=True)) self.assertTrue(all(i == 0 for i in y_pred[:int(self.cont * n)])) self.assertTrue(all(i == 1 for i in y_pred[int(self.cont * n):])) + def test_single_data_to_model_domain(self): + with patch.object(self.model, "data_to_model_domain", + wraps=self.model.data_to_model_domain) as call: + self.model(self.X_all) + self.assertEqual(call.call_count, 1) + def test_EllipticEnvelope_ignores_y(self): domain = Domain((ContinuousVariable("x1"), ContinuousVariable("x2")), (ContinuousVariable("y1"), ContinuousVariable("y2"))) @@ -231,7 +238,6 @@ def test_transformer(self): detect = self.detector(self.iris) pred = detect(self.iris) var = pred.domain.metas[0] - self.assertIs(var, var.compute_value.variable) np.testing.assert_array_equal(pred[:, "Outlier"].metas.ravel(), var.compute_value(self.iris))