Skip to content

Commit adcd4d8

Browse files
committed
Investigating PCovCSPaceTest errors
1 parent 72a951e commit adcd4d8

File tree

9 files changed

+475
-62
lines changed

9 files changed

+475
-62
lines changed

examples/pcovc/PCovC-BreastCancerDataset.ipynb

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
},
4343
{
4444
"cell_type": "code",
45-
"execution_count": null,
45+
"execution_count": 2,
4646
"metadata": {},
4747
"outputs": [
4848
{
@@ -190,7 +190,7 @@
190190
},
191191
{
192192
"cell_type": "code",
193-
"execution_count": null,
193+
"execution_count": 3,
194194
"metadata": {},
195195
"outputs": [],
196196
"source": [
@@ -210,16 +210,16 @@
210210
},
211211
{
212212
"cell_type": "code",
213-
"execution_count": null,
213+
"execution_count": 4,
214214
"metadata": {},
215215
"outputs": [
216216
{
217217
"data": {
218218
"text/plain": [
219-
"<matplotlib.legend.Legend at 0x11a62f610>"
219+
"<matplotlib.legend.Legend at 0x110136e40>"
220220
]
221221
},
222-
"execution_count": 46,
222+
"execution_count": 4,
223223
"metadata": {},
224224
"output_type": "execute_result"
225225
},
@@ -258,16 +258,16 @@
258258
},
259259
{
260260
"cell_type": "code",
261-
"execution_count": null,
261+
"execution_count": 5,
262262
"metadata": {},
263263
"outputs": [
264264
{
265265
"data": {
266266
"text/plain": [
267-
"<matplotlib.collections.PathCollection at 0x11a6d3390>"
267+
"<matplotlib.collections.PathCollection at 0x1103cead0>"
268268
]
269269
},
270-
"execution_count": 47,
270+
"execution_count": 5,
271271
"metadata": {},
272272
"output_type": "execute_result"
273273
},
@@ -302,7 +302,7 @@
302302
},
303303
{
304304
"cell_type": "code",
305-
"execution_count": null,
305+
"execution_count": 6,
306306
"metadata": {},
307307
"outputs": [
308308
{

examples/pcovc/test_notebook.ipynb

Lines changed: 345 additions & 0 deletions
Large diffs are not rendered by default.

src/skmatter/decomposition/_kernel_pcovc.py

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,59 @@
1616
from sklearn.preprocessing import LabelBinarizer
1717
from sklearn.utils._array_api import get_namespace, indexing_dtype
1818
from sklearn.svm import SVC
19+
from sklearn.base import clone
20+
from copy import deepcopy
1921

20-
from ..preprocessing import KernelNormalizer
21-
from ..utils import check_krr_fit, pcovr_kernel
22+
from skmatter.preprocessing import KernelNormalizer
23+
from skmatter.utils import check_krr_fit, pcovr_kernel
24+
25+
def check_cl_fit(classifier, X, y):
26+
r"""
27+
Checks that a (linear) classifier is fitted, and if not,
28+
fits it with the provided data
29+
:param regressor: sklearn-style classifier
30+
:type classifier: object
31+
:param X: feature matrix with which to fit the classifier
32+
if it is not already fitted
33+
:type X: array
34+
:param y: target values with which to fit the classifier
35+
if it is not already fitted
36+
:type y: array
37+
"""
38+
try:
39+
check_is_fitted(classifier)
40+
fitted_classifier = deepcopy(classifier)
41+
42+
# Check compatibility with X
43+
fitted_classifier._validate_data(X, y, reset=False, multi_output=True)
44+
45+
# Check compatibility with y
46+
47+
# changed from if fitted_classifier.coef_.ndim != y.ndim:
48+
# dimension of classifier coefficients is always 2, hence we don't need to check
49+
# for match with Y
50+
if fitted_classifier.coef_.shape[1] != X.shape[1]:
51+
raise ValueError(
52+
"The classifier coefficients have a shape incompatible "
53+
"with the supplied feature space. "
54+
"The coefficients have shape %d and the features "
55+
"have shape %d" % (fitted_classifier.coef_.shape, X.shape)
56+
)
57+
# LogisticRegression does not support multioutput, but RidgeClassifier does
58+
elif y.ndim == 2:
59+
if fitted_classifier.coef_.shape[0] != y.shape[1]:
60+
raise ValueError(
61+
"The classifier coefficients have a shape incompatible "
62+
"with the supplied target space. "
63+
"The coefficients have shape %r and the targets "
64+
"have shape %r" % (fitted_classifier.coef_.shape, y.shape)
65+
)
66+
67+
except NotFittedError:
68+
fitted_classifier = clone(classifier)
69+
fitted_classifier.fit(X, y)
70+
71+
return fitted_classifier
2272

2373

2474
class KernelPCovC(_BasePCA, LinearModel):
@@ -432,7 +482,8 @@ def fit(self, X, y, W=None):
432482

433483
self._fit(K, Z, W) #gives us T, Pkt, self.pt__
434484

435-
485+
self.classifier_ = check_cl_fit(classifier, K @ self.pkt, y) #Ptz as weights
486+
436487
'''
437488
we now need Z = TPtz
438489
@@ -477,6 +528,7 @@ def decision_function(self, X=None, T=None):
477528
return T @ self.ptz_
478529

479530
#is there a reason why this predict function is different than the one in PCovc?
531+
#it can be the same
480532
def predict(self, X=None, T=None):
481533
"""Predicts class values from X or T."""
482534

src/skmatter/decomposition/_kernel_pcovr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
1515
from sklearn.utils.validation import check_is_fitted, check_X_y
1616

17-
from ..preprocessing import KernelNormalizer
18-
from ..utils import check_krr_fit, pcovr_kernel
17+
from skmatter.preprocessing import KernelNormalizer
18+
from skmatter.utils import check_krr_fit, pcovr_kernel
1919

2020

2121
class KernelPCovR(_BasePCA, LinearModel):

src/skmatter/decomposition/_pcovc.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -422,13 +422,24 @@ def fit(self, X, y, W=None):
422422

423423
# instead of using linear regression solution, refit with the classifier
424424
# and steal weights to get ptz
425-
#this is failing because self.classifier is never changed from None if None is passed as classifier
426-
#change self.classifier to classifier and see what happens. if classifier is precomputed, there might be more errors so be careful.
425+
# this is failing because self.classifier is never changed from None if None is passed as classifier
426+
# change self.classifier to classifier and see what happens. if classifier is precomputed, there might be more errors so be careful.
427427
# if classifier is precomputed, I don't think we need to check if the classifier is fit or not?
428428

429429
#most tests are passing if we change self.classifier to classifier (just like how PCovR has it for self.regressor = ...)
430-
self.classifier_ = check_cl_fit(self.classifier, X @ self.pxt_, y=y) #Has Ptz as weights
431-
#(self.classifier_.)
430+
#print(self.pxt_.shape)
431+
#print((X @ self.pxt_).shape)
432+
433+
434+
435+
#cases:
436+
#1. if classifier has been fit with X and Y already, we dont need to perform a check_cl_fit
437+
#2. if classifier has not been fit with X or Y, we can perform check_cl_fit but don't need to
438+
#3. if classifier has been fit with T and Y, we need to perform check_cl_fit (doesn't make sense actually, why would we fit with T and y)
439+
440+
# old: self.classifier_ = check_cl_fit(self.classifier, X @ self.pxt_, y=y) #Has Ptz as weights
441+
self.classifier_ = check_cl_fit(classifier, X @ self.pxt_, y=y) #Has Ptz as weights
442+
432443
if isinstance(self.classifier_, MultiOutputClassifier):
433444
self.ptz_ = np.hstack(
434445
[est_.coef_.T for est_ in self.classifier_.estimators_]

src/skmatter/decomposition/_pcovr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
1616
from sklearn.utils.validation import check_is_fitted, check_X_y
1717

18-
from ..utils import check_lr_fit, pcovr_covariance, pcovr_kernel
18+
from skmatter.utils import check_lr_fit, pcovr_covariance, pcovr_kernel
1919

2020

2121
class PCovR(_BasePCA, LinearModel):

src/skmatter/decomposition/playground.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,26 @@
1818
print(X.shape)
1919
print(Y.shape)
2020

21+
pcovc = PCovC(mixing=0.0, classifier=LogisticRegression(), n_components=2, space="feature")
22+
23+
#pcovc.classifier.fit(X, Y)
24+
#print(pcovc.classifier.coef_.shape)
25+
pcovc.fit(X, Y)
26+
T = pcovc.transform(X)
27+
28+
29+
30+
31+
32+
33+
34+
35+
36+
37+
38+
39+
40+
2141
# classifier = LogisticRegression()
2242
# classifier.fit(X, Y)
2343

@@ -27,59 +47,39 @@
2747
# print(pcovc.classifier.coef_.ndim)
2848

2949
# pcovc.fit(X, Y)
30-
X = [[1, 2, 3, 4, 5],
31-
[2, 3, 4, 5, 6]]
32-
Y = [[0, 1, 0, 1, 0],
33-
[0, 1, 0, 1, 0]]
50+
# X = [[1, 2, 3, 4, 5],
51+
# [2, 3, 4, 5, 6]]
52+
# Y = [[0, 1, 0, 1, 0],
53+
# [0, 1, 0, 1, 0]]
3454

35-
classifier = LogisticRegression()
36-
classifier.fit(X, Y)
37-
model = PCovC(classifier=classifier)
55+
# classifier = LogisticRegression()
56+
# classifier.fit(X, Y)
57+
# model = PCovC(classifier=classifier)
3858

3959
#model2 = PCovC(classifier=LogisticRegression())
4060
#model2.fit(X, Y)
4161

4262
#problem is that coef_.shape (1, n_features=30) is not the same as
43-
print(model.classifier.coef_.shape)
44-
#print(model2.classifier.coef_.ndim)
63+
# print(model.classifier.coef_.shape)
64+
# #print(model2.classifier.coef_.ndim)
4565

46-
model.fit(X, Y)
47-
y_pred = model.predict(X)
48-
print(accuracy_score(y_pred, Y))
66+
# model.fit(X, Y)
67+
# y_pred = model.predict(X)
68+
# print(accuracy_score(y_pred, Y))
4969

50-
X_new, Y_new = get_dataset2(return_X_y=True)
51-
print(X_new.shape)
52-
print(Y_new.shape)
70+
# X_new, Y_new = get_dataset2(return_X_y=True)
71+
# print(X_new.shape)
72+
# print(Y_new.shape)
5373

5474

5575
'''
5676
Problem is this: check_lr_fit and check_cl_fit do different things because the coefficients for Logistic/Linear regression are different.
5777
So we need to change check_cl_fit
58-
'''
78+
5979
scaler = StandardScaler()
6080
X_new = scaler.fit_transform(X_new)
6181
regressor = LinearRegression()
6282
6383
regressor.fit(X_new, Y_new)
6484
model2 = PCovR(regressor = regressor)
65-
print(model2.regressor.coef_)
66-
67-
68-
69-
70-
# model = KernelPCovC(
71-
# mixing=0.5,
72-
# classifier=SVC(),
73-
# n_components=4
74-
# )
75-
76-
# model2 = KernelPCovR(
77-
# mixing=0.5,
78-
# regressor=KernelRidge(gamma="scale"),
79-
# n_components=4
80-
# )
81-
# model3 = SVC()
82-
# model3.fit(X, Y)
83-
# print(model3.dual_coef_.shape)
84-
# # print(model2.gamma, model2.regressor.gamma)
85-
# # model2.fit(X, Y)
85+
print(model2.regressor.coef_)'''

tests/test_pcovc.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def test_against_pca(self):
4040
pcovc = PCovC(
4141
mixing=1.0, n_components=2, space="feature", svd_solver="full"
4242
).fit(self.X, self.Y)
43-
print(pcovc.score(self.X, self.Y))
43+
4444
pca = PCA(n_components=2, svd_solver="full").fit(self.X)
4545

4646
# tests that the SVD is equivalent
@@ -78,7 +78,7 @@ def test_simple_prediction(self):
7878
"""
7979
for space in ["feature", "sample", "auto"]:
8080
with self.subTest(space=space):
81-
# failing because check_lr_fit wei
81+
print(self.X.shape)
8282
pcovc = self.model(mixing=0.0, n_components=2, space=space)
8383

8484
pcovc.classifier.fit(self.X, self.Y)
@@ -240,6 +240,10 @@ def test_spaces_equivalent(self):
240240
# ))
241241

242242
#failing for all alpha values
243+
# so these are similar (within approximately 0.001), but not exactly the same.
244+
# I think this is because transform and inverse_transform depend on Pxt and Ptx,
245+
# which in turn depend on Z, which is a matrix of class likelihoods (so maybe there is some rounding problems)
246+
243247
self.assertTrue(
244248
np.allclose(
245249
pcovc_ss.inverse_transform(pcovc_ss.transform(self.X)),
@@ -476,9 +480,9 @@ def test_classifier_modifications(self):
476480

477481
# PCovC classifier doesn't change after fitting
478482
pcovc.fit(self.X, self.Y)
479-
classifier.set_params(alpha=1e-4)
480-
self.assertTrue(hasattr(pcovc.classifier, "coef_"))
481-
self.assertTrue(classifier.get_params() != pcovc.classifier.get_params())
483+
classifier.set_params(random_state=3)
484+
self.assertTrue(hasattr(pcovc.classifier_, "coef_"))
485+
self.assertTrue(classifier.get_params() != pcovc.classifier_.get_params())
482486

483487
def test_incompatible_classifier(self):
484488
classifier = GaussianNB()

tests/test_pcovr.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ def test_spaces_equivalent(self):
231231
# pcovr_ss.pxt_, pcovr_fs.pxt_,
232232
# self.error_tol
233233
# ))
234-
# print(" ")
234+
# print(" ")
235+
235236
self.assertTrue(
236237
np.allclose(
237238
pcovr_ss.inverse_transform(pcovr_ss.transform(self.X)),

0 commit comments

Comments
 (0)