Skip to content

Commit fa353da

Browse files
committed
[ENH] Linear Discriminant Analysis (LDA)
1 parent a6aafd6 commit fa353da

File tree

3 files changed

+115
-0
lines changed

3 files changed

+115
-0
lines changed

Orange/projection/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
from .manifold import *
55
from .freeviz import *
66
from .radviz import radviz
7+
from .lda import LDA

Orange/projection/lda.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
2+
3+
import Orange.data
4+
from Orange.classification.logistic_regression import _FeatureScorerMixin
5+
from Orange.data.util import SharedComputeValue
6+
from Orange.projection import SklProjector, Projection, LinearCombinationSql
7+
8+
__all__ = ["LDA"]
9+
10+
11+
class LDA(SklProjector, _FeatureScorerMixin):
12+
name = "LDA"
13+
supports_sparse = False
14+
15+
def __init__(self, n_components=2, solver='eigen', preprocessors=None):
16+
super().__init__(preprocessors=preprocessors)
17+
self.n_components = n_components
18+
self.solver = solver
19+
20+
def fit(self, X, Y=None):
21+
if self.n_components is not None:
22+
self.n_components = min(min(X.shape), self.n_components)
23+
proj = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
24+
proj = proj.fit(X, Y)
25+
return LDAModel(proj, self.domain)
26+
27+
28+
class _LDATransformDomain:
29+
"""Computation common for all LDA variables."""
30+
def __init__(self, lda):
31+
self.lda = lda
32+
33+
def __call__(self, data):
34+
if data.domain != self.lda.pre_domain:
35+
data = data.transform(self.lda.pre_domain)
36+
return self.lda.transform(data.X)
37+
38+
39+
class LDAModel(Projection):
40+
name = "LDAModel"
41+
42+
def __init__(self, proj, domain):
43+
lda_transform = _LDATransformDomain(self)
44+
self.components_ = proj.scalings_.T
45+
46+
def lda_variable(i):
47+
return Orange.data.ContinuousVariable(
48+
'LD%d' % (i + 1), compute_value=LDAProjector(self, i, lda_transform))
49+
50+
super().__init__(proj=proj)
51+
self.orig_domain = domain
52+
self.n_components = self.components_.shape[0]
53+
self.domain = Orange.data.Domain(
54+
[lda_variable(i) for i in range(proj.n_components)],
55+
domain.class_vars, domain.metas)
56+
57+
58+
class LDAProjector(SharedComputeValue):
59+
"""Transform into a given LDA component."""
60+
def __init__(self, projection, feature, lda_transform):
61+
super().__init__(lda_transform)
62+
self.feature = feature
63+
64+
def compute(self, data, lda_space):
65+
return lda_space[:, self.feature]

Orange/tests/test_lda.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
4+
import unittest
5+
6+
import numpy as np
7+
8+
from Orange.preprocess import Continuize, Randomize
9+
from Orange.projection import LDA
10+
from Orange.data import Table
11+
12+
13+
class TestLDA(unittest.TestCase):
14+
def test_lda(self):
15+
iris = Table('iris')
16+
n_components = 2
17+
lda = LDA(n_components=n_components)
18+
model = lda(iris)
19+
transformed = model(iris)
20+
self.assertEqual(transformed.X.shape, (len(iris), n_components))
21+
self.assertEqual(transformed.Y.shape, (len(iris),))
22+
23+
def test_transform_changed_domain(self):
24+
"""
25+
1. Open data, apply some preprocessor, splits the data into two parts,
26+
use LDA on the first part, and then transform the second part.
27+
28+
2. Open data, split into two parts, apply the same preprocessor and
29+
LDA only on the first part, and then transform the second part.
30+
31+
The transformed second part in (1) and (2) has to be the same.
32+
"""
33+
data = Table("iris")
34+
data = Randomize()(data)
35+
preprocessor = Continuize()
36+
lda = LDA()
37+
38+
# normalize all
39+
ndata = preprocessor(data)
40+
41+
model = lda(ndata[:75])
42+
result_1 = model(ndata[75:])
43+
44+
# normalize only the "training" part
45+
ndata = preprocessor(data[:75])
46+
model = lda(ndata)
47+
result_2 = model(data[75:])
48+
49+
np.testing.assert_almost_equal(result_1.X, result_2.X)

0 commit comments

Comments
 (0)