Skip to content

Commit 45d4659

Browse files
committed
Fixing decision_function shape for PCovC, writing corresponding Z test
1 parent 768d82b commit 45d4659

File tree

3 files changed

+44
-10
lines changed

3 files changed

+44
-10
lines changed

src/skmatter/decomposition/pcovc_new.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -288,20 +288,19 @@ class likelihoods, :math:`{\mathbf{Z}}`.
288288
else:
289289
W = self.z_classifier_.coef_.T.reshape(X.shape[1], -1)
290290
Z = self.z_classifier_.decision_function(X).reshape(X.shape[0], -1)
291-
#computes Z this will throw an error since pxz and ptz aren't defined yet
292291

293292
else:
294-
Z = y.copy()
293+
#Z = y.copy()
294+
Z = X @ W
295295
if W is None:
296296
W = np.linalg.lstsq(X, Z, self.tol)[0] #W = weights for Pxz
297297
# print("Z: "+str(Z[:4]))
298298
# print("W: "+str(W[:4]))
299299
self._label_binarizer = LabelBinarizer(neg_label=-1, pos_label=1)
300300
Y = self._label_binarizer.fit_transform(y) #check if we need this
301-
302301
if not self._label_binarizer.y_type_.startswith("multilabel"):
303302
y = column_or_1d(y, warn=True)
304-
303+
305304
if self.space_ == "feature":
306305
self._fit_feature_space(X, Y.reshape(Z.shape), Z)
307306
else:
@@ -332,10 +331,9 @@ class likelihoods, :math:`{\mathbf{Z}}`.
332331
self.ptz_ = np.hstack(
333332
[est_.coef_.T for est_ in self.classifier_.estimators_]
334333
)
335-
336334
self.pxz_ = self.pxt_ @ self.ptz_
337335
else:
338-
self.ptz_ = self.classifier_.coef_.T #this is actually of shape (n_features, 1) when we have binary classification, but we need it to be shape (n_features, n_classes)
336+
self.ptz_ = self.classifier_.coef_.T
339337
self.pxz_ = self.pxt_ @ self.ptz_
340338

341339
if len(Y.shape) == 1:
@@ -425,10 +423,16 @@ def decision_function(self, X=None, T=None):
425423

426424
if X is not None:
427425
X = check_array(X)
428-
return X @ self.pxz_
426+
scores = X @ self.pxz_
429427
else:
430428
T = check_array(T)
431-
return T @ self.ptz_
429+
scores = T @ self.ptz_
430+
431+
return (
432+
np.reshape(scores, (-1, ))
433+
if (scores.ndim > 1 and scores.shape[1] == 1)
434+
else scores
435+
)
432436

433437
def predict(self, X=None, T=None):
434438
"""Predicts the property labels using classification on T."""

tests/test_kernel_pcovc.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
from sklearn import exceptions
5+
from sklearn.calibration import LinearSVC
56
from sklearn.datasets import load_breast_cancer as get_dataset
67
from sklearn.kernel_ridge import KernelRidge
78
from sklearn.linear_model import Ridge, RidgeCV
@@ -193,6 +194,8 @@ def test_centerer(self):
193194

194195
def test_prefit_classifier(self):
195196
classifier = SVC(kernel="rbf", gamma=0.1)
197+
#this fails since we are trying to call decision_function(K) on a classifier fitted with X
198+
#see line 340 of kernel_pcovr
196199
classifier.fit(self.X, self.Y)
197200
print(classifier.n_features_in_)
198201
kpcovc = self.model(mixing=0.5, classifier=classifier, kernel="rbf", gamma=0.1)
@@ -336,7 +339,7 @@ def test_linear_matches_pcovc(self):
336339
"""Check that KernelPCovC returns the same results as PCovC when using a linear
337340
kernel.
338341
"""
339-
svc = SVC(kernel="linear", gamma="scale", coef0=0)
342+
svc = LinearSVC()
340343
svc.fit(self.X, self.Y)
341344

342345
# common instantiation parameters for the two models

tests/test_pcovc.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,32 @@ def test_T_shape(self):
416416
T = pcovc.transform(self.X)
417417
self.assertTrue(check_X_y(self.X, T, multi_output=True))
418418
self.assertTrue(T.shape[-1] == n_components)
419+
420+
def test_Z_shape(self):
421+
"""Check that PCovC returns an evidence matrix consistent with the shape of the input
422+
matrix and the number of classes.
423+
"""
424+
n_components = 5
425+
pcovc = self.model(n_components=n_components, tol=1e-12)
426+
427+
pcovc.fit(self.X, self.Y)
428+
429+
# Shape (n_samples, ) for binary classifcation
430+
Z = pcovc.decision_function(self.X)
431+
432+
self.assertTrue(Z.ndim == 1)
433+
self.assertTrue(Z.shape[0] == self.X.shape[0])
434+
435+
Y_multiclass = self.Y.copy()
436+
Y_multiclass[0] = 2
437+
438+
pcovc.fit(self.X, Y_multiclass)
439+
440+
# Shape (n_samples, n_classes) for multiclass classification
441+
Z = pcovc.decision_function(self.X)
442+
443+
self.assertTrue(Z.ndim == 2)
444+
self.assertTrue(Z.shape[0] == self.X.shape[0])
419445

420446
def test_default_ncomponents(self):
421447
pcovc = PCovC(mixing=0.5)
@@ -451,6 +477,7 @@ def test_prefit_classifier(self):
451477
def test_prefit_classification(self):
452478
classifier = LogisticRegression()
453479
classifier.fit(self.X, self.Y)
480+
#Yhat = classifier.predict(self.X)
454481
Yhat = classifier.predict(self.X)
455482
W = classifier.coef_.reshape(self.X.shape[1], -1)
456483
pcovc1 = self.model(mixing=0.5, classifier="precomputed", n_components=1)
@@ -525,7 +552,7 @@ def test_incompatible_coef_shape(self):
525552
# "The coefficients have dimension %d and the targets "
526553
# "have dimension %d" % (classifier.coef_.ndim, self.Y.squeeze().ndim),
527554
# )
528-
555+
529556
with self.assertRaises(ValueError) as cm:
530557
pcovc.fit(self.X, np.column_stack((self.Y, self.Y)))
531558
self.assertEqual(

0 commit comments

Comments
 (0)