Skip to content

Commit 4256b16

Browse files
authored
Merge pull request #4338 from VesnaT/outlier_detection
[RFC][ENH] Outliers: Widget upgrade
2 parents c6c8300 + 40f5521 commit 4256b16

File tree

7 files changed

+437
-185
lines changed

7 files changed

+437
-185
lines changed

Orange/classification/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from .tree import *
1616
from .simple_tree import *
1717
from .simple_random_forest import *
18-
from .elliptic_envelope import *
18+
from .outlier_detection import *
1919
from .rules import *
2020
from .sgd import *
2121
from .neural_network import *

Orange/classification/elliptic_envelope.py

Lines changed: 0 additions & 41 deletions
This file was deleted.
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# pylint: disable=unused-argument
2+
from sklearn.covariance import EllipticEnvelope
3+
from sklearn.ensemble import IsolationForest
4+
from sklearn.neighbors import LocalOutlierFactor
5+
from Orange.base import SklLearner, SklModel
6+
from Orange.data import Table, Domain
7+
8+
__all__ = ["LocalOutlierFactorLearner", "IsolationForestLearner",
9+
"EllipticEnvelopeLearner"]
10+
11+
12+
class _OutlierDetector(SklLearner):
13+
def __call__(self, data: Table):
14+
data = data.transform(Domain(data.domain.attributes))
15+
return super().__call__(data)
16+
17+
18+
class LocalOutlierFactorLearner(_OutlierDetector):
19+
__wraps__ = LocalOutlierFactor
20+
name = "Local Outlier Factor"
21+
22+
def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30,
23+
metric="minkowski", p=2, metric_params=None,
24+
contamination="auto", novelty=True, n_jobs=None,
25+
preprocessors=None):
26+
super().__init__(preprocessors=preprocessors)
27+
self.params = vars()
28+
29+
30+
class IsolationForestLearner(_OutlierDetector):
31+
__wraps__ = IsolationForest
32+
name = "Isolation Forest"
33+
34+
def __init__(self, n_estimators=100, max_samples='auto',
35+
contamination='auto', max_features=1.0, bootstrap=False,
36+
n_jobs=None, behaviour='deprecated', random_state=None,
37+
verbose=0, warm_start=False, preprocessors=None):
38+
super().__init__(preprocessors=preprocessors)
39+
self.params = vars()
40+
41+
42+
class EllipticEnvelopeClassifier(SklModel):
43+
def mahalanobis(self, observations):
44+
"""Computes squared Mahalanobis distances of given observations.
45+
46+
Parameters
47+
----------
48+
observations : ndarray (n_samples, n_features) or Orange Table
49+
50+
Returns
51+
-------
52+
distances : ndarray (n_samples,)
53+
Squared Mahalanobis distances given observations.
54+
"""
55+
if isinstance(observations, Table):
56+
observations = observations.X
57+
return self.skl_model.mahalanobis(observations)
58+
59+
60+
class EllipticEnvelopeLearner(_OutlierDetector):
61+
__wraps__ = EllipticEnvelope
62+
__returns__ = EllipticEnvelopeClassifier
63+
name = "Covariance Estimator"
64+
65+
def __init__(self, store_precision=True, assume_centered=False,
66+
support_fraction=None, contamination=0.1,
67+
random_state=None, preprocessors=None):
68+
super().__init__(preprocessors=preprocessors)
69+
self.params = vars()
70+
71+
def __call__(self, data: Table):
72+
data = data.transform(Domain(data.domain.attributes))
73+
return super().__call__(data)

Orange/classification/svm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma="auto", coef0=0.0,
6969

7070

7171
class OneClassSVMLearner(SklLearnerBase):
72+
name = "One class SVM"
7273
__wraps__ = skl_svm.OneClassSVM
7374
preprocessors = svm_pps
7475

Orange/tests/test_elliptic_envelope.py renamed to Orange/classification/tests/test_outlier_detection.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
import numpy as np
77
from Orange.data import Table, Domain, ContinuousVariable
8-
from Orange.classification import EllipticEnvelopeLearner
8+
from Orange.classification import EllipticEnvelopeLearner, \
9+
IsolationForestLearner, LocalOutlierFactorLearner
910

1011

1112
class TestEllipticEnvelopeLearner(unittest.TestCase):
@@ -44,7 +45,7 @@ def test_mahalanobis(self):
4445

4546
def test_EllipticEnvelope_ignores_y(self):
4647
domain = Domain((ContinuousVariable("x1"), ContinuousVariable("x2")),
47-
class_vars=(ContinuousVariable("y1"), ContinuousVariable("y2")))
48+
(ContinuousVariable("y1"), ContinuousVariable("y2")))
4849
X = np.random.random((40, 2))
4950
Y = np.random.random((40, 2))
5051
table = Table(domain, X, Y)
@@ -60,3 +61,25 @@ def test_EllipticEnvelope_ignores_y(self):
6061
np.testing.assert_array_equal(pred1, pred2)
6162
np.testing.assert_array_equal(pred2, pred3)
6263
np.testing.assert_array_equal(pred3, pred4)
64+
65+
66+
class TestOutlierDetection(unittest.TestCase):
67+
@classmethod
68+
def setUpClass(cls):
69+
cls.iris = Table("iris")
70+
71+
def test_LocalOutlierFactorDetector(self):
72+
detector = LocalOutlierFactorLearner(contamination=0.1)
73+
detect = detector(self.iris)
74+
is_inlier = detect(self.iris)
75+
self.assertEqual(len(np.where(is_inlier == -1)[0]), 14)
76+
77+
def test_IsolationForestDetector(self):
78+
detector = IsolationForestLearner(contamination=0.1)
79+
detect = detector(self.iris)
80+
is_inlier = detect(self.iris)
81+
self.assertEqual(len(np.where(is_inlier == -1)[0]), 15)
82+
83+
84+
if __name__ == "__main__":
85+
unittest.main()

0 commit comments

Comments
 (0)