Skip to content

Commit 0790842

Browse files
authored
Merge pull request #2823 from jerneju/lda
[ENH] Linear Discriminant Analysis: scripting part
2 parents a6d4f7f + c126417 commit 0790842

File tree

4 files changed

+163
-0
lines changed

4 files changed

+163
-0
lines changed

Orange/projection/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
from .manifold import *
55
from .freeviz import *
66
from .radviz import radviz
7+
from .lda import LDA

Orange/projection/lda.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
2+
3+
import Orange.data
4+
from Orange.classification.logistic_regression import _FeatureScorerMixin
5+
from Orange.data.util import SharedComputeValue
6+
from Orange.projection import SklProjector, Projection
7+
8+
__all__ = ["LDA"]
9+
10+
11+
class LDA(SklProjector, _FeatureScorerMixin):
12+
name = "LDA"
13+
supports_sparse = False
14+
15+
def __init__(self, n_components=2, solver='eigen', preprocessors=None):
16+
super().__init__(preprocessors=preprocessors)
17+
self.n_components = n_components
18+
self.solver = solver
19+
20+
def fit(self, X, Y=None):
21+
if self.n_components is not None:
22+
self.n_components = min(min(X.shape), self.n_components)
23+
proj = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
24+
proj = proj.fit(X, Y)
25+
return LDAModel(proj, self.domain)
26+
27+
28+
class _LDATransformDomain:
29+
"""Computation common for all LDA variables."""
30+
def __init__(self, lda):
31+
self.lda = lda
32+
33+
def __call__(self, data):
34+
if data.domain != self.lda.pre_domain:
35+
data = data.transform(self.lda.pre_domain)
36+
return self.lda.transform(data.X)
37+
38+
39+
class LDAModel(Projection):
40+
name = "LDAModel"
41+
42+
def __init__(self, proj, domain):
43+
lda_transform = _LDATransformDomain(self)
44+
self.components_ = proj.scalings_.T
45+
46+
def lda_variable(i):
47+
return Orange.data.ContinuousVariable(
48+
'LD%d' % (i + 1), compute_value=LDAProjector(self, i, lda_transform))
49+
50+
super().__init__(proj=proj)
51+
self.orig_domain = domain
52+
self.n_components = self.components_.shape[0]
53+
self.domain = Orange.data.Domain(
54+
[lda_variable(i) for i in range(proj.n_components)],
55+
domain.class_vars, domain.metas)
56+
57+
58+
class LDAProjector(SharedComputeValue):
59+
"""Transform into a given LDA component."""
60+
def __init__(self, projection, feature, lda_transform):
61+
super().__init__(lda_transform)
62+
self.feature = feature
63+
64+
def compute(self, data, lda_space):
65+
return lda_space[:, self.feature]

Orange/tests/test_lda.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
4+
import unittest
5+
6+
import numpy as np
7+
8+
from Orange.preprocess import Continuize, Randomize
9+
from Orange.projection import LDA
10+
from Orange.data import Table
11+
12+
13+
class TestLDA(unittest.TestCase):
14+
def test_lda(self):
15+
iris = Table('iris')
16+
n_components = 2
17+
lda = LDA(n_components=n_components)
18+
model = lda(iris)
19+
transformed = model(iris)
20+
self.assertEqual(transformed.X.shape, (len(iris), n_components))
21+
self.assertEqual(transformed.Y.shape, (len(iris),))
22+
23+
def test_transform_changed_domain(self):
24+
"""
25+
1. Open data, apply some preprocessor, splits the data into two parts,
26+
use LDA on the first part, and then transform the second part.
27+
28+
2. Open data, split into two parts, apply the same preprocessor and
29+
LDA only on the first part, and then transform the second part.
30+
31+
The transformed second part in (1) and (2) has to be the same.
32+
"""
33+
data = Table("iris")
34+
data = Randomize()(data)
35+
preprocessor = Continuize()
36+
lda = LDA()
37+
38+
# normalize all
39+
ndata = preprocessor(data)
40+
41+
model = lda(ndata[:75])
42+
result_1 = model(ndata[75:])
43+
44+
# normalize only the "training" part
45+
ndata = preprocessor(data[:75])
46+
model = lda(ndata)
47+
result_2 = model(data[75:])
48+
49+
np.testing.assert_almost_equal(result_1.X, result_2.X)

doc/data-mining-library/source/reference/projection.rst

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,51 @@ Example
8383

8484

8585
.. autoclass:: Orange.projection.freeviz.FreeViz
86+
87+
88+
89+
90+
LDA
91+
---
92+
93+
Linear discriminant analysis is another way of finding a linear transformation of
94+
data that reduces the number of dimensions required to represent it. It is often
95+
used for dimensionality reduction prior to classification, but can also be used as a
96+
classification technique itself ([1]_).
97+
98+
99+
Example
100+
=======
101+
102+
>>> from Orange.projection import LDA
103+
>>> from Orange.data import Table
104+
>>> iris = Table('iris')
105+
>>> lda = LDA()
106+
>>> model = LDA(iris)
107+
>>> model.components_ # LDA components
108+
array([[ 0.20490976, 0.38714331, -0.54648218, -0.71378517],
109+
[ 0.00898234, 0.58899857, -0.25428655, 0.76703217],
110+
[-0.71507172, 0.43568045, 0.45568731, -0.30200008],
111+
[ 0.06449913, -0.35780501, -0.42514529, 0.828895 ]])
112+
>>> transformed_data = model(iris) # transformed data
113+
>>> transformed_data
114+
[[1.492, 1.905 | Iris-setosa],
115+
[1.258, 1.608 | Iris-setosa],
116+
[1.349, 1.750 | Iris-setosa],
117+
[1.180, 1.639 | Iris-setosa],
118+
[1.510, 1.963 | Iris-setosa],
119+
...
120+
]
121+
122+
123+
124+
.. autoclass:: Orange.projection.lda.LDA
125+
126+
127+
128+
References
129+
----------
130+
131+
.. [1] Witten, I.H., Frank, E., Hall, M.A. and Pal, C.J., 2016.
132+
Data Mining: Practical machine learning tools and techniques. Morgan Kaufmann.
133+

0 commit comments

Comments
 (0)