Skip to content

Commit 16cac60

Browse files
committed
Outliers: Refactor
1 parent aa36a3b commit 16cac60

File tree

4 files changed

+85
-49
lines changed

4 files changed

+85
-49
lines changed

Orange/classification/outlier_detection.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from sklearn.svm import OneClassSVM
1010

1111
from Orange.base import SklLearner, SklModel
12-
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
12+
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable, \
13+
Variable
1314
from Orange.preprocess import AdaptiveNormalize
1415
from Orange.statistics.util import all_nan
1516

@@ -58,17 +59,38 @@ class _OutlierLearner(SklLearner):
5859
supports_multiclass = True
5960

6061
def _fit_model(self, data: Table) -> _OutlierModel:
61-
names = [v.name for v in data.domain.variables + data.domain.metas]
62-
data = data.transform(Domain(data.domain.attributes))
63-
self.__model = super()._fit_model(data)
64-
self.__model.outlier_var = DiscreteVariable(
65-
get_unique_names(names, "Outlier"), values=["Yes", "No"],
66-
compute_value=self.compute_value
62+
domain = data.domain
63+
model = super()._fit_model(data.transform(Domain(domain.attributes)))
64+
65+
transformer = _Transformer(model)
66+
names = [v.name for v in domain.variables + domain.metas]
67+
variable = DiscreteVariable(
68+
get_unique_names(names, "Outlier"),
69+
values=["Yes", "No"],
70+
compute_value=transformer
6771
)
68-
return self.__model
6972

70-
def compute_value(self, table: Table) -> np.ndarray:
71-
return self.__model(table)[:, self.__model.outlier_var].metas
73+
transformer.variable = variable
74+
model.outlier_var = variable
75+
return model
76+
77+
78+
class _Transformer:
79+
def __init__(self, model: _OutlierModel):
80+
self._model = model
81+
self._variable = None
82+
83+
@property
84+
def variable(self) -> Variable:
85+
return self._variable
86+
87+
@variable.setter
88+
def variable(self, var: Variable):
89+
self._variable = var
90+
91+
def __call__(self, data: Table) -> np.ndarray:
92+
assert isinstance(self._variable, Variable)
93+
return self._model(data).get_column_view(self._variable)[0]
7294

7395

7496
class OneClassSVMLearner(_OutlierLearner):
@@ -146,14 +168,17 @@ def __init__(self, store_precision=True, assume_centered=False,
146168
self.params = vars()
147169

148170
def _fit_model(self, data: Table) -> EllipticEnvelopeClassifier:
149-
names = [v.name for v in data.domain.variables + data.domain.metas]
150-
self.__model = super()._fit_model(data)
151-
self.__model.mahal_var = ContinuousVariable(
171+
domain = data.domain
172+
model = super()._fit_model(data.transform(Domain(domain.attributes)))
173+
174+
transformer = _Transformer(model)
175+
names = [v.name for v in domain.variables + domain.metas]
176+
variable = ContinuousVariable(
152177
get_unique_names(names, "Mahalanobis"),
153-
compute_value=self.compute_mahal
178+
compute_value=transformer
154179
)
155-
return self.__model
156180

157-
def compute_mahal(self, table: Table) -> np.ndarray:
158-
return self.__model(table)[:, self.__model.mahal_var].metas
181+
transformer.variable = variable
182+
model.mahal_var = variable
183+
return model
159184

Orange/classification/tests/test_outlier_detection.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
import unittest
66

77
import numpy as np
8-
from Orange.data import Table, Domain, ContinuousVariable
8+
99
from Orange.classification import EllipticEnvelopeLearner, \
1010
IsolationForestLearner, LocalOutlierFactorLearner, OneClassSVMLearner
11+
from Orange.data import Table, Domain, ContinuousVariable
1112

1213

1314
class _TestDetector(unittest.TestCase):
@@ -213,6 +214,14 @@ def test_transform(self):
213214
pred2 = self.iris.transform(pred.domain)
214215
self.assert_table_equal(pred, pred2)
215216

217+
def test_transformer(self):
218+
detect = self.detector(self.iris)
219+
pred = detect(self.iris)
220+
var = pred.domain.metas[0]
221+
self.assertIs(var, var.compute_value.variable)
222+
np.testing.assert_array_equal(pred[:, "Outlier"].metas.ravel(),
223+
var.compute_value(self.iris))
224+
216225
def test_pickle_model(self):
217226
detect = self.detector(self.iris)
218227
f = tempfile.NamedTemporaryFile(suffix='.pkl', delete=False)

Orange/widgets/data/owoutliers.py

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner,\
1111
LocalOutlierFactorLearner, IsolationForestLearner
12-
from Orange.data import Table, DiscreteVariable
12+
from Orange.data import Table
1313
from Orange.widgets import gui
1414
from Orange.widgets.settings import Setting
1515
from Orange.widgets.utils.sql import check_sql_input
@@ -238,44 +238,40 @@ def enable_controls(self):
238238
self.method_combo.model().item(self.Covariance).setEnabled(False)
239239
self.Warning.disabled_cov()
240240

241-
def _get_outliers(self) -> Tuple[Table, Table, Table]:
241+
def commit(self):
242+
inliers, outliers, data = self.detect_outliers()
243+
summary = len(inliers) if inliers else self.info.NoOutput
244+
self.info.set_output_summary(summary)
245+
self.Outputs.inliers.send(inliers)
246+
self.Outputs.outliers.send(outliers)
247+
self.Outputs.data.send(data)
248+
249+
def detect_outliers(self) -> Tuple[Table, Table, Table]:
250+
self.n_inliers = self.n_outliers = None
242251
self.Error.singular_cov.clear()
243252
self.Error.memory_error.clear()
253+
if not self.data:
254+
return None, None, None
244255
try:
245-
pred, outlier_var = self.detect_outliers()
256+
learner_class = self.METHODS[self.outlier_method]
257+
kwargs = self.current_editor.get_parameters()
258+
learner = learner_class(**kwargs)
259+
model = learner(self.data)
260+
pred = model(self.data)
246261
except ValueError:
247262
self.Error.singular_cov()
248263
return None, None, None
249264
except MemoryError:
250265
self.Error.memory_error()
251266
return None, None, None
252267
else:
253-
col = pred[:, outlier_var].metas
268+
col = pred[:, model.outlier_var].metas
254269
inliers_ind = np.where(col == 1)[0]
255270
outliers_ind = np.where(col == 0)[0]
256271
self.n_inliers = len(inliers_ind)
257272
self.n_outliers = len(outliers_ind)
258273
return self.data[inliers_ind], self.data[outliers_ind], pred
259274

260-
def commit(self):
261-
inliers = outliers = data = None
262-
self.n_inliers = self.n_outliers = None
263-
if self.data:
264-
inliers, outliers, data = self._get_outliers()
265-
266-
summary = len(inliers) if inliers else self.info.NoOutput
267-
self.info.set_output_summary(summary)
268-
self.Outputs.inliers.send(inliers)
269-
self.Outputs.outliers.send(outliers)
270-
self.Outputs.data.send(data)
271-
272-
def detect_outliers(self) -> Tuple[Table, DiscreteVariable]:
273-
learner_class = self.METHODS[self.outlier_method]
274-
kwargs = self.current_editor.get_parameters()
275-
learner = learner_class(**kwargs)
276-
model = learner(self.data)
277-
return model(self.data), model.outlier_var
278-
279275
def send_report(self):
280276
if self.n_outliers is None or self.n_inliers is None:
281277
return

Orange/widgets/data/tests/test_owoutliers.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,22 +68,28 @@ def callback():
6868
self.assertIsNotNone(self.get_output(self.widget.Outputs.outliers))
6969
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
7070

71-
self.send_signal(self.widget.Inputs.data, self.iris)
71+
self.widget.send_report()
72+
self.send_signal(self.widget.Inputs.data, self.heart_disease)
7273
simulate.combobox_run_through_all(self.widget.method_combo,
7374
callback=callback)
7475

75-
def test_memory_error(self):
76+
@patch("Orange.classification.outlier_detection._OutlierModel.predict")
77+
def test_memory_error(self, mocked_predict: Mock):
7678
"""
7779
Handling memory error.
7880
GH-2374
7981
"""
80-
data = Table("iris")[::3]
8182
self.assertFalse(self.widget.Error.memory_error.is_shown())
82-
with unittest.mock.patch(
83-
"Orange.widgets.data.owoutliers.OWOutliers.detect_outliers",
84-
side_effect=MemoryError):
85-
self.send_signal("Data", data)
86-
self.assertTrue(self.widget.Error.memory_error.is_shown())
83+
mocked_predict.side_effect = MemoryError
84+
self.send_signal(self.widget.Inputs.data, self.iris)
85+
self.assertTrue(self.widget.Error.memory_error.is_shown())
86+
87+
@patch("Orange.classification.outlier_detection._OutlierModel.predict")
88+
def test_singular_cov_error(self, mocked_predict: Mock):
89+
self.assertFalse(self.widget.Error.singular_cov.is_shown())
90+
mocked_predict.side_effect = ValueError
91+
self.send_signal(self.widget.Inputs.data, self.iris)
92+
self.assertTrue(self.widget.Error.singular_cov.is_shown())
8793

8894
def test_nans(self):
8995
"""Widget does not crash with nans"""

0 commit comments

Comments
 (0)