Skip to content

Commit 6b7598d

Browse files
authored
Add NIPALS vector difference norm calculation (#10)
* Add norm calculation for NIPALS score vector convergence When using the NIPALS algorithm the difference of subsequent score vectors is now calculated by using the second order norm of the difference vector. i.e. the frobenius norm as default. Optionally, other norms can be used as defined in the function's docstring. * Update new test data to match precise decimals of NIPALS change * Add scikit-learn array copy behaviour to MBPLS methods
1 parent 3f0ddf5 commit 6b7598d

File tree

13 files changed

+371
-330
lines changed

13 files changed

+371
-330
lines changed

mbpls/mbpls.py

Lines changed: 67 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,26 @@ class MBPLS(TransformerMixin, RegressorMixin, MultiOutputMixin, BaseEstimator):
5151
max_tol : non-negative float (default 1e-14)
5252
Maximum tolerance allowed when using the iterative NIPALS algorithm
5353
54+
nipals_convergence_norm : {non-zero int, inf, -inf, 'fro', 'nuc'} (default 2)
55+
Order of the norm that is used to calculate the difference of the superscore vectors between subsequent
56+
iterations of the NIPALS algorithm. Following orders are available:
57+
58+
===== ============================ ==========================
59+
ord norm for matrices norm for vectors
60+
===== ============================ ==========================
61+
None Frobenius norm 2-norm
62+
'fro' Frobenius norm --
63+
'nuc' nuclear norm --
64+
inf max(sum(abs(x), axis=1)) max(abs(x))
65+
-inf min(sum(abs(x), axis=1)) min(abs(x))
66+
0 -- sum(x != 0)
67+
1 max(sum(abs(x), axis=0)) as below
68+
-1 min(sum(abs(x), axis=0)) as below
69+
2 2-norm (largest sing. value) as below
70+
-2 smallest singular value as below
71+
other -- sum(abs(x)**ord)**(1./ord)
72+
===== ============================ ==========================
73+
5474
calc_all : bool (default True)
5575
Calculate all internal attributes for the used method. Some methods do not need to calculate all attributes,
5676
i.e. scores, weights etc., to obtain the regression coefficients used for prediction. Setting this parameter
@@ -62,7 +82,11 @@ class MBPLS(TransformerMixin, RegressorMixin, MultiOutputMixin, BaseEstimator):
6282
allowed.
6383
Without setting this parameter to 'True', sparse data will not be accepted.
6484
65-
85+
copy : bool (default True)
86+
Whether the deflation should be done on a copy. Not using a copy might alter the input data and have
87+
unforeseeable consequences.
88+
89+
6690
Model attributes after fitting
6791
------------------------------
6892
@@ -97,7 +121,6 @@ class MBPLS(TransformerMixin, RegressorMixin, MultiOutputMixin, BaseEstimator):
97121
98122
explained_var_y_ : list, explained variance in :math:`Y` :math:`[k]`
99123
100-
101124
102125
Notes
103126
-----
@@ -216,15 +239,17 @@ class MBPLS(TransformerMixin, RegressorMixin, MultiOutputMixin, BaseEstimator):
216239
https://github.com/DTUComputeStatisticsAndDataAnalysis/MBPLS/tree/master/examples
217240
"""
218241

219-
def __init__(self, n_components=2, full_svd=False, method='NIPALS', standardize=True, max_tol=1e-14, calc_all=True,
220-
sparse_data=False):
242+
def __init__(self, n_components=2, full_svd=False, method='NIPALS', standardize=True, max_tol=1e-14,
243+
nipals_convergence_norm=2, calc_all=True, sparse_data=False, copy=True):
221244
self.n_components = n_components
222245
self.full_svd = full_svd
223246
self.method = method
224247
self.standardize = standardize
225248
self.max_tol = max_tol
249+
self.nipals_convergence_norm = nipals_convergence_norm
226250
self.calc_all = calc_all
227251
self.sparse_data = sparse_data
252+
self.copy = copy
228253

229254
def check_sparsity_level(self, data):
230255
total_rows, total_columns = data.shape
@@ -264,7 +289,7 @@ def fit(self, X, Y):
264289
self.method = 'NIPALS'
265290

266291
global U_, T_, R_
267-
Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data)
292+
Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data, copy=self.copy)
268293
if self.sparse_data is True:
269294
self.sparse_Y_info_ = {}
270295
self.sparse_Y_info_['Y'] = self.check_sparsity_level(Y)
@@ -279,14 +304,15 @@ def fit(self, X, Y):
279304
self.x_scalers_.append(StandardScaler(with_mean=True, with_std=True))
280305
# Check dimensions
281306
check_consistent_length(X[block], Y)
282-
X[block] = check_array(X[block], dtype=np.float64, copy=True, force_all_finite=not self.sparse_data)
307+
X[block] = check_array(X[block], dtype=np.float64, copy=self.copy,
308+
force_all_finite=not self.sparse_data)
283309
if self.sparse_data is True:
284310
self.sparse_X_info_[block] = self.check_sparsity_level(X[block])
285311
X[block] = self.x_scalers_[block].fit_transform(X[block])
286312
else:
287313
self.x_scalers_.append(StandardScaler(with_mean=True, with_std=True))
288314
# Check dimensions
289-
X = check_array(X, dtype=np.float64, copy=True, force_all_finite=not self.sparse_data)
315+
X = check_array(X, dtype=np.float64, copy=self.copy, force_all_finite=not self.sparse_data)
290316
if self.sparse_data is True:
291317
self.sparse_X_info_ = {}
292318
self.sparse_X_info_[0] = self.check_sparsity_level(X)
@@ -302,12 +328,13 @@ def fit(self, X, Y):
302328
for block in range(len(X)):
303329
# Check dimensions
304330
check_consistent_length(X[block], Y)
305-
X[block] = check_array(X[block], dtype=np.float64, copy=True, force_all_finite=not self.sparse_data)
331+
X[block] = check_array(X[block], dtype=np.float64, copy=self.copy,
332+
force_all_finite=not self.sparse_data)
306333
if self.sparse_data is True:
307334
self.sparse_X_info_[block] = self.check_sparsity_level(X[block])
308335
else:
309336
# Check dimensions
310-
X = check_array(X, dtype=np.float64, copy=True, force_all_finite=not self.sparse_data)
337+
X = check_array(X, dtype=np.float64, copy=self.copy, force_all_finite=not self.sparse_data)
311338
if self.sparse_data is True:
312339
self.sparse_X_info_ = {}
313340
self.sparse_X_info_[0] = self.check_sparsity_level(X)
@@ -852,7 +879,7 @@ def fit(self, X, Y):
852879
if run == 1:
853880
pass
854881
else:
855-
diff_t = np.sum(superscores_old - superscores)
882+
diff_t = np.linalg.norm((superscores_old - superscores), ord=self.nipals_convergence_norm)
856883
superscores_old = np.copy(superscores)
857884
# 6. Regress superscores agains Y_calc
858885
if self.sparse_data:
@@ -1017,27 +1044,35 @@ def fit(self, X, Y):
10171044
else:
10181045
raise NameError('Method you called is unknown')
10191046

1020-
def transform(self, X, Y=None, return_block_scores=False):
1047+
def transform(self, X, Y=None, return_block_scores=False, copy=True):
10211048
""" Obtain scores based on the fitted model
10221049
1050+
10231051
Parameters
10241052
----------
10251053
X : list
10261054
of arrays containing all xblocks x1, x2, ..., xn. Rows are observations, columns are features/variables
1055+
10271056
(optional) Y : array
10281057
1-dim or 2-dim array of reference values
1058+
10291059
return_block_scores: bool (default False)
10301060
Returning block scores T_ when transforming the data
10311061
1062+
copy : bool (default True)
1063+
Whether to perform in-place transformation. Not using a copy might alter the input data and have
1064+
unforeseeable consequences.
1065+
1066+
10321067
Returns
10331068
----------
10341069
Super_scores : np.array
10351070
10361071
Block_scores : list
1037-
List of np.arrays containing the block scores
1072+
List of np.arrays containing the block scores
10381073
10391074
Y_scores : np.array (optional)
1040-
Y-scores, if y was given
1075+
Y-scores, if y was given
10411076
"""
10421077
check_is_fitted(self, 'beta_')
10431078

@@ -1049,13 +1084,13 @@ def transform(self, X, Y=None, return_block_scores=False):
10491084
if isinstance(X, list) and not isinstance(X[0], list):
10501085
for block in range(len(X)):
10511086
# Check dimensions
1052-
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data)
1087+
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
10531088
if self.sparse_data:
10541089
sparse_X_info_[block] = self.check_sparsity_level(X[block])
10551090
X[block] = self.x_scalers_[block].transform(X[block])
10561091
else:
10571092
# Check dimensions
1058-
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data)
1093+
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
10591094
if self.sparse_data:
10601095
sparse_X_info_[0] = self.check_sparsity_level(X)
10611096
X = [self.x_scalers_[0].transform(X)]
@@ -1075,7 +1110,7 @@ def transform(self, X, Y=None, return_block_scores=False):
10751110
Ts_ = X_comp.dot(self.R_)
10761111

10771112
if Y is not None:
1078-
Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data)
1113+
Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data, copy=copy)
10791114
if self.sparse_data:
10801115
sparse_Y_info_['Y'] = self.check_sparsity_level(Y)
10811116
if Y.ndim == 1:
@@ -1181,12 +1216,12 @@ def transform(self, X, Y=None, return_block_scores=False):
11811216
if isinstance(X, list) and not isinstance(X[0], list):
11821217
for block in range(len(X)):
11831218
# Check dimensions
1184-
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data)
1219+
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
11851220
if self.sparse_data:
11861221
sparse_X_info_[block] = self.check_sparsity_level(X[block])
11871222
else:
11881223
# Check dimensions
1189-
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data)
1224+
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
11901225
if self.sparse_data:
11911226
sparse_X_info_[0] = self.check_sparsity_level(X)
11921227
X = [X]
@@ -1206,7 +1241,7 @@ def transform(self, X, Y=None, return_block_scores=False):
12061241
Ts_ = X_comp.dot(self.R_)
12071242

12081243
if Y is not None:
1209-
Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data)
1244+
Y = check_array(Y, dtype=np.float64, ensure_2d=False, force_all_finite=not self.sparse_data, copy=copy)
12101245
if self.sparse_data:
12111246
sparse_Y_info_['Y'] = self.check_sparsity_level(Y)
12121247
if Y.ndim == 1:
@@ -1290,19 +1325,25 @@ def transform(self, X, Y=None, return_block_scores=False):
12901325
else:
12911326
return Ts_
12921327

1293-
def predict(self, X):
1328+
def predict(self, X, copy=True):
12941329
"""Predict y based on the fitted model
12951330
1331+
12961332
Parameters
12971333
----------
1334+
12981335
X : list
12991336
of all xblocks x1, x2, ..., xn. Rows are observations, columns are features/variables
13001337
1338+
copy : bool (default True)
1339+
Whether to perform in-place transformation. Not using a copy might alter the input data and have
1340+
unforeseeable consequences.
1341+
1342+
13011343
Returns
13021344
----------
13031345
y_hat : np.array
1304-
Predictions made based on trained model and supplied X
1305-
1346+
Predictions made based on trained model and supplied X
13061347
"""
13071348
check_is_fitted(self, 'beta_')
13081349

@@ -1313,10 +1354,10 @@ def predict(self, X):
13131354
if isinstance(X, list) and not isinstance(X[0], list):
13141355
for block in range(len(X)):
13151356
# Check dimensions
1316-
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data)
1357+
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
13171358
X[block] = self.x_scalers_[block].transform(X[block])
13181359
else:
1319-
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data)
1360+
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
13201361
X = [self.x_scalers_[0].transform(X)]
13211362

13221363

@@ -1336,9 +1377,9 @@ def predict(self, X):
13361377
if isinstance(X, list) and not isinstance(X[0], list):
13371378
for block in range(len(X)):
13381379
# Check dimensions
1339-
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data)
1380+
X[block] = check_array(X[block], dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
13401381
else:
1341-
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data)
1382+
X = check_array(X, dtype=np.float64, force_all_finite=not self.sparse_data, copy=copy)
13421383
X = [X]
13431384
X = np.hstack(X)
13441385
if self.sparse_data:

mbpls/tests/test_data/A_NIPALS.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
9.460050930379139134e-01,3.864865617219123001e-02
2-
5.399490696208620460e-02,9.613513438278087353e-01
1+
9.274646237881400967e-01,5.716118652731461136e-02
2+
7.253537621185997264e-02,9.428388134726853886e-01
Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
5.808427406751311750e+00,6.520579921256950406e-01
2-
5.926596442662182440e+00,3.414058821124497101e-01
3-
6.086453323360574430e+00,4.960972689404420932e-01
4-
5.673416006491299513e+00,2.196348688272564686e-02
5-
6.059110719312798210e+00,5.575722505091080805e-01
6-
5.650942259585248095e+00,8.035470872887177096e-01
7-
6.193024415711314568e+00,3.761258217688800976e-01
8-
5.718477717250506132e+00,1.180765043320053109e-01
9-
5.974501227518040025e+00,1.105125183151397239e-01
10-
3.851918076221314990e+00,3.173299523953762025e-01
11-
6.038798238605442847e+00,1.152914973439507662e+00
12-
5.346208049272602736e+00,1.285607400057692695e+00
13-
6.140535096631946743e+00,6.281127826818750925e-01
14-
6.094854212815943306e+00,6.500160775864066709e-01
15-
5.111949514516579640e+00,1.041565457794748850e+00
16-
5.862289305226957126e+00,5.993825610514952329e-02
17-
5.608125399568422154e+00,-1.001429840936495008e+00
18-
1.626524804393373547e+00,9.296548505843652555e-01
19-
5.993136960784692491e+00,8.656563868700076769e-01
20-
-6.865050824961729248e-01,-1.347598947582476225e-01
21-
6.157950800160661764e+00,7.322040498902356864e-01
22-
1.147230076524884401e-01,8.922509625715264736e-01
23-
5.348783867045700191e+00,7.416084505076506739e-01
24-
5.884084660201166272e+00,1.894637312204782709e-01
25-
5.689278078360489488e+00,1.176970173160049482e+00
1+
5.778141171141287558e+00,8.807390506152754650e-01
2+
5.908470722787161122e+00,5.750364384512615112e-01
3+
6.062100478068037646e+00,7.358715443699839209e-01
4+
5.668136154400002624e+00,2.457974398229465507e-01
5+
6.032348050525556893e+00,7.961299169925415420e-01
6+
5.614807645731139552e+00,1.025857546053548397e+00
7+
6.173329201680968659e+00,6.202103429515453126e-01
8+
5.709381844946635809e+00,3.437633844666128646e-01
9+
5.965492877132286154e+00,3.462350148801632832e-01
10+
3.836469051155537269e+00,4.693360014527241386e-01
11+
5.988570606931146401e+00,1.390411268493505270e+00
12+
5.291293080231936230e+00,1.495354912435294281e+00
13+
6.110934167362700897e+00,8.698676960424125415e-01
14+
6.064406950376159067e+00,8.899853132326137617e-01
15+
5.066888370390126184e+00,1.242888876859258307e+00
16+
5.855372046908557948e+00,2.912183806269581510e-01
17+
5.643261210523061067e+00,-7.792454675832711786e-01
18+
1.588547991938236148e+00,9.930027998805039946e-01
19+
5.954338133724771609e+00,1.101669613466050412e+00
20+
-6.806038670955768533e-01,-1.622982367317185592e-01
21+
6.124247472541748394e+00,9.747324336857725591e-01
22+
7.941412319847895862e-02,8.956274678154049207e-01
23+
5.315367390122011315e+00,9.521689148940255532e-01
24+
5.871993582189810823e+00,4.215876038627815459e-01
25+
5.638359679673937919e+00,1.400667425441476599e+00

0 commit comments

Comments
 (0)