Skip to content

Commit 5c838de

Browse files
markotoplakVesnaT
authored andcommitted
PLS: Move from Orange-spectroscopy
1 parent 88f1048 commit 5c838de

File tree

6 files changed

+720
-0
lines changed

6 files changed

+720
-0
lines changed

Orange/regression/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .random_forest import *
1414
from .tree import *
1515
from .neural_network import *
16+
from .pls import *
1617
from ..classification.simple_tree import *
1718
try:
1819
from .catgb import *

Orange/regression/pls.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
import numpy as np
2+
import pkg_resources
3+
import sklearn
4+
import sklearn.cross_decomposition as skl_pls
5+
6+
from Orange.data import Table, Domain, Variable, \
7+
ContinuousVariable, StringVariable
8+
from Orange.data.util import get_unique_names, SharedComputeValue
9+
from Orange.preprocess.score import LearnerScorer
10+
from Orange.regression import SklLearner, SklModel
11+
12+
__all__ = ["PLSRegressionLearner"]
13+
14+
15+
class _FeatureScorerMixin(LearnerScorer):
16+
feature_type = Variable
17+
class_type = ContinuousVariable
18+
19+
def score(self, data):
20+
model = self(data)
21+
return np.abs(model.coefficients), model.domain.attributes
22+
23+
24+
class _PLSCommonTransform:
25+
26+
def __init__(self, pls_model):
27+
self.pls_model = pls_model
28+
29+
def _transform_with_numpy_output(self, X, Y):
30+
pls = self.pls_model.skl_model
31+
"""
32+
# the next command does the following
33+
x_center = X - pls._x_mean
34+
y_center = Y - pls._y_mean
35+
t = x_center @ pls.x_rotations_
36+
u = y_center @ pls.y_rotations_
37+
"""
38+
t, u = pls.transform(X, Y)
39+
return np.hstack((t, u))
40+
41+
def __call__(self, data):
42+
if data.domain != self.pls_model.domain:
43+
data = data.transform(self.pls_model.domain)
44+
if len(data.Y.shape) == 1:
45+
Y = data.Y.reshape(-1, 1)
46+
else:
47+
Y = data.Y
48+
return self._transform_with_numpy_output(data.X, Y)
49+
50+
51+
class PLSProjector(SharedComputeValue):
52+
def __init__(self, transform, feature):
53+
super().__init__(transform)
54+
self.feature = feature
55+
56+
def compute(self, _, space):
57+
return space[:, self.feature]
58+
59+
60+
class PLSModel(SklModel):
61+
var_prefix_X = "PLS T"
62+
var_prefix_Y = "PLS U"
63+
64+
@property
65+
def coefficients(self):
66+
coef = self.skl_model.coef_
67+
# 1.3 has transposed coef_
68+
if pkg_resources.parse_version(
69+
sklearn.__version__) < pkg_resources.parse_version("1.3.0"):
70+
coef = coef.T
71+
return coef
72+
73+
def predict(self, X):
74+
vals = self.skl_model.predict(X)
75+
if len(self.domain.class_vars) == 1:
76+
vals = vals.ravel()
77+
return vals
78+
79+
def __str__(self):
80+
return 'PLSModel {}'.format(self.skl_model)
81+
82+
def _get_var_names(self, n, prefix):
83+
names = [f"{prefix}{postfix}" for postfix in range(1, n + 1)]
84+
return get_unique_names([var.name for var in self.domain.metas], names)
85+
86+
def project(self, data):
87+
if not isinstance(data, Table):
88+
raise RuntimeError("PLSModel can only project tables")
89+
90+
transformer = _PLSCommonTransform(self)
91+
92+
def trvar(i, name):
93+
return ContinuousVariable(name,
94+
compute_value=PLSProjector(transformer,
95+
i))
96+
97+
n_components = self.skl_model.x_loadings_.shape[1]
98+
99+
var_names_X = self._get_var_names(n_components, self.var_prefix_X)
100+
var_names_Y = self._get_var_names(n_components, self.var_prefix_Y)
101+
102+
domain = Domain(
103+
[trvar(i, var_names_X[i]) for i in range(n_components)],
104+
data.domain.class_vars,
105+
list(data.domain.metas) +
106+
[trvar(n_components + i, var_names_Y[i]) for i in
107+
range(n_components)]
108+
)
109+
110+
return data.transform(domain)
111+
112+
def components(self):
113+
orig_domain = self.domain
114+
names = [a.name for a in
115+
orig_domain.attributes + orig_domain.class_vars]
116+
meta_name = get_unique_names(names, 'components')
117+
118+
n_components = self.skl_model.x_loadings_.shape[1]
119+
120+
meta_vars = [StringVariable(name=meta_name)]
121+
metas = np.array(
122+
[[f"Component {i + 1}" for i in range(n_components)]], dtype=object
123+
).T
124+
dom = Domain(
125+
[ContinuousVariable(a.name) for a in orig_domain.attributes],
126+
[ContinuousVariable(a.name) for a in orig_domain.class_vars],
127+
metas=meta_vars)
128+
components = Table(dom,
129+
self.skl_model.x_loadings_.T,
130+
Y=self.skl_model.y_loadings_.T,
131+
metas=metas)
132+
components.name = 'components'
133+
return components
134+
135+
def coefficients_table(self):
136+
coeffs = self.coefficients.T
137+
domain = Domain(
138+
[ContinuousVariable(f"coef {i}") for i in range(coeffs.shape[1])],
139+
metas=[StringVariable("name")]
140+
)
141+
waves = [[attr.name] for attr in self.domain.attributes]
142+
coef_table = Table.from_numpy(domain, X=coeffs, metas=waves)
143+
coef_table.name = "coefficients"
144+
return coef_table
145+
146+
147+
class PLSRegressionLearner(SklLearner, _FeatureScorerMixin):
148+
__wraps__ = skl_pls.PLSRegression
149+
__returns__ = PLSModel
150+
supports_multiclass = True
151+
preprocessors = SklLearner.preprocessors
152+
153+
def fit(self, X, Y, W=None):
154+
params = self.params.copy()
155+
params["n_components"] = min(X.shape[1] - 1,
156+
X.shape[0] - 1,
157+
params["n_components"])
158+
clf = self.__wraps__(**params)
159+
return self.__returns__(clf.fit(X, Y))
160+
161+
def __init__(self, n_components=2, scale=True,
162+
max_iter=500, preprocessors=None):
163+
super().__init__(preprocessors=preprocessors)
164+
self.params = vars()
165+
166+
def incompatibility_reason(self, domain):
167+
reason = None
168+
if not domain.class_vars:
169+
reason = "Numeric targets expected."
170+
else:
171+
for cv in domain.class_vars:
172+
if not cv.is_continuous:
173+
reason = "Only numeric target variables expected."
174+
return reason
175+
176+
177+
if __name__ == '__main__':
178+
import Orange
179+
180+
data = Orange.data.Table('housing')
181+
learners = [PLSRegressionLearner(n_components=2, max_iter=100)]
182+
res = Orange.evaluation.CrossValidation()(data, learners)
183+
for learner, ca in zip(learners, Orange.evaluation.RMSE(res)):
184+
print("learner: {}\nRMSE: {}\n".format(learner, ca))
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# pylint: disable=missing-docstring
2+
import unittest
3+
4+
import pkg_resources
5+
import numpy as np
6+
import sklearn
7+
from sklearn.cross_decomposition import PLSRegression
8+
9+
from Orange.data import Table, Domain, ContinuousVariable
10+
from Orange.regression import PLSRegressionLearner
11+
12+
13+
def table(rows, attr, vars):
14+
attr_vars = [ContinuousVariable(name="Feature %i" % i) for i in
15+
range(attr)]
16+
class_vars = [ContinuousVariable(name="Class %i" % i) for i in range(vars)]
17+
domain = Domain(attr_vars, class_vars, [])
18+
X = np.random.RandomState(0).random((rows, attr))
19+
Y = np.random.RandomState(1).random((rows, vars))
20+
return Table.from_numpy(domain, X=X, Y=Y)
21+
22+
23+
def coefficients(sklmodel):
24+
coef = sklmodel.coef_
25+
# 1.3 has transposed coef_
26+
if pkg_resources.parse_version(
27+
sklearn.__version__) < pkg_resources.parse_version("1.3.0"):
28+
coef = coef.T
29+
return coef
30+
31+
32+
class TestPLSRegressionLearner(unittest.TestCase):
33+
@classmethod
34+
def setUpClass(cls):
35+
cls.housing = Table("housing")
36+
37+
def test_allow_y_dim(self):
38+
""" The current PLS version allows only a single Y dimension. """
39+
learner = PLSRegressionLearner(n_components=2)
40+
d = table(10, 5, 0)
41+
with self.assertRaises(ValueError):
42+
learner(d)
43+
for n_class_vars in [1, 2, 3]:
44+
d = table(10, 5, n_class_vars)
45+
learner(d) # no exception
46+
47+
def test_compare_to_sklearn(self):
48+
d = table(10, 5, 1)
49+
orange_model = PLSRegressionLearner()(d)
50+
scikit_model = PLSRegression().fit(d.X, d.Y)
51+
np.testing.assert_almost_equal(scikit_model.predict(d.X).ravel(),
52+
orange_model(d))
53+
np.testing.assert_almost_equal(coefficients(scikit_model),
54+
orange_model.coefficients)
55+
56+
def test_compare_to_sklearn_multid(self):
57+
d = table(10, 5, 3)
58+
orange_model = PLSRegressionLearner()(d)
59+
scikit_model = PLSRegression().fit(d.X, d.Y)
60+
np.testing.assert_almost_equal(scikit_model.predict(d.X),
61+
orange_model(d))
62+
np.testing.assert_almost_equal(coefficients(scikit_model),
63+
orange_model.coefficients)
64+
65+
def test_too_many_components(self):
66+
# do not change n_components
67+
d = table(5, 5, 1)
68+
model = PLSRegressionLearner(n_components=4)(d)
69+
self.assertEqual(model.skl_model.n_components, 4)
70+
# need to use fewer components; column limited
71+
d = table(6, 5, 1)
72+
model = PLSRegressionLearner(n_components=6)(d)
73+
self.assertEqual(model.skl_model.n_components, 4)
74+
# need to use fewer components; row limited
75+
d = table(5, 6, 1)
76+
model = PLSRegressionLearner(n_components=6)(d)
77+
self.assertEqual(model.skl_model.n_components, 4)
78+
79+
def test_scores(self):
80+
for d in [table(10, 5, 1), table(10, 5, 3)]:
81+
orange_model = PLSRegressionLearner()(d)
82+
scikit_model = PLSRegression().fit(d.X, d.Y)
83+
scores = orange_model.project(d)
84+
sx, sy = scikit_model.transform(d.X, d.Y)
85+
np.testing.assert_almost_equal(sx, scores.X)
86+
np.testing.assert_almost_equal(sy, scores.metas)
87+
88+
def test_components(self):
89+
def t2d(m):
90+
return m.reshape(-1, 1) if len(m.shape) == 1 else m
91+
92+
for d in [table(10, 5, 1), table(10, 5, 3)]:
93+
orange_model = PLSRegressionLearner()(d)
94+
scikit_model = PLSRegression().fit(d.X, d.Y)
95+
components = orange_model.components()
96+
np.testing.assert_almost_equal(scikit_model.x_loadings_,
97+
components.X.T)
98+
np.testing.assert_almost_equal(scikit_model.y_loadings_,
99+
t2d(components.Y).T)
100+
101+
def test_coefficients(self):
102+
for d in [table(10, 5, 1), table(10, 5, 3)]:
103+
orange_model = PLSRegressionLearner()(d)
104+
scikit_model = PLSRegression().fit(d.X, d.Y)
105+
coef_table = orange_model.coefficients_table()
106+
np.testing.assert_almost_equal(coefficients(scikit_model).T,
107+
coef_table.X)
108+
109+
110+
if __name__ == "__main__":
111+
unittest.main()

0 commit comments

Comments
 (0)