Skip to content

Commit 7f24a7d

Browse files
committed
Finalizing/touching up docs
1 parent 513241b commit 7f24a7d

File tree

4 files changed

+51
-64
lines changed

4 files changed

+51
-64
lines changed

src/skmatter/decomposition/_pcov.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,4 +284,4 @@ def _decompose_full(self, mat):
284284
U[:, : self.n_components_],
285285
S[: self.n_components_],
286286
Vt[: self.n_components_],
287-
)
287+
)

src/skmatter/decomposition/_pcovc.py

Lines changed: 41 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
)
1212
from sklearn.linear_model._base import LinearClassifierMixin
1313
from sklearn.svm import LinearSVC
14-
from sklearn.multioutput import MultiOutputClassifier
1514
from sklearn.utils import check_array
1615
from sklearn.utils.validation import check_is_fitted, validate_data
1716
from sklearn.utils.multiclass import check_classification_targets, type_of_target
@@ -99,21 +98,19 @@ class PCovC(LinearClassifierMixin, _BasePCov):
9998
default=`sample` when :math:`{n_{samples} < n_{features}}` and
10099
`feature` when :math:`{n_{features} < n_{samples}}`
101100
102-
classifier: {`RidgeClassifier`, `RidgeClassifierCV`, `LogisticRegression`,
103-
`LogisticRegressionCV`, `SGDClassifier`, `LinearSVC`, `precomputed`}, default=None
104-
classifier for computing :math:`{\mathbf{Z}}`. The classifier should be one
105-
`sklearn.linear_model.RidgeClassifier`, `sklearn.linear_model.RidgeClassifierCV`,
101+
classifier: {`LogisticRegression`, `LogisticRegressionCV`, `LinearSVC`, `LinearDiscriminantAnalysis`,
102+
`RidgeClassifier`, `RidgeClassifierCV`, `SGDClassifier`, `Perceptron`, `precomputed`}, default=None
103+
classifier for computing :math:`{\mathbf{Z}}`. The classifier should be one of
106104
`sklearn.linear_model.LogisticRegression`, `sklearn.linear_model.LogisticRegressionCV`,
107-
`sklearn.linear_model.SGDClassifier`, or `sklearn.svm.LinearSVC`. If a pre-fitted classifier
105+
`sklearn.svm.LinearSVC`, `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`,
106+
`sklearn.linear_model.RidgeClassifier`, `sklearn.linear_model.RidgeClassifierCV`,
107+
`sklearn.linear_model.SGDClassifier`, or `Perceptron`. If a pre-fitted classifier
108108
is provided, it is used to compute :math:`{\mathbf{Z}}`.
109109
Note that any pre-fitting of the classifier will be lost if `PCovC` is
110110
within a composite estimator that enforces cloning, e.g.,
111-
`sklearn.compose.TransformedTargetclassifier` or
112111
`sklearn.pipeline.Pipeline` with model caching.
113112
In such cases, the classifier will be re-fitted on the same
114113
training data as the composite estimator.
115-
If `precomputed`, we assume that the `y` passed to the `fit` function
116-
is the classified form of the targets :math:`{\mathbf{\hat{Y}}}`.
117114
If None, ``sklearn.linear_model.LogisticRegression()``
118115
is used as the classifier.
119116
@@ -147,15 +144,24 @@ class PCovC(LinearClassifierMixin, _BasePCov):
147144
n_components, or the lesser value of n_features and n_samples
148145
if n_components is None.
149146
147+
classifier : estimator object
148+
The linear classifier passed for fitting.
149+
150+
z_classifier_ : estimator object
151+
The linear classifier fit between X and Y.
152+
153+
classifier_ : estimator object
154+
The linear classifier fit between T and Y.
155+
150156
pxt_ : ndarray of size :math:`({n_{features}, n_{components}})`
151157
the projector, or weights, from the input space :math:`\mathbf{X}`
152158
to the latent-space projection :math:`\mathbf{T}`
153159
154-
pxz_ : ndarray of size :math:`({n_{features}, n_{classes}})`
160+
pxz_ : ndarray of size :math: `({n_{features}, })` or `({n_{features}, n_{classes}})`
155161
the projector, or weights, from the input space :math:`\mathbf{X}`
156162
to the class confidence scores :math:`\mathbf{Z}`
157163
158-
ptz_ : ndarray of size :math:`({n_{components}, n_{classes}})`
164+
ptz_ : ndarray of size :math: ``({n_{components}, })` or `({n_{components}, n_{classes}})`
159165
the projector, or weights, from the latent-space projection
160166
:math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`
161167
@@ -171,19 +177,20 @@ class PCovC(LinearClassifierMixin, _BasePCov):
171177
--------
172178
>>> import numpy as np
173179
>>> from skmatter.decomposition import PCovC
180+
>>> from sklearn.preprocessing import StandardScaler
174181
>>> X = np.array([[-1, 0, -2, 3], [3, -2, 0, 1], [-3, 0, -1, -1], [1, 3, 0, -2]])
175182
>>> X = StandardScaler().fit_transform(X)
176183
>>> Y = np.array([0, 1, 2, 0])
177184
>>> pcovc = PCovC(mixing=0.1, n_components=2)
178185
>>> pcovc.fit(X, Y)
179186
PCovC(mixing=0.1, n_components=2)
180187
>>> pcovc.transform(X)
181-
array([[-0.4794854 -0.46228114]
182-
[ 1.9416966 0.2532831 ]
183-
[-1.08744947 0.89117784]
184-
[-0.37476173 -0.6821798 ]])
188+
array([[-0.4794854 , -0.46228114],
189+
[ 1.9416966 , 0.2532831 ],
190+
[-1.08744947, 0.89117784],
191+
[-0.37476173, -0.6821798 ]])
185192
>>> pcovc.predict(X)
186-
array([0 1 2 0])
193+
array([0, 1, 2, 0])
187194
""" # NoQa: E501
188195

189196
def __init__(
@@ -225,38 +232,30 @@ def fit(self, X, Y, W=None):
225232
to have unit variance, otherwise :math:`\mathbf{X}` should be
226233
scaled so that each feature has a variance of 1 / n_features.
227234
228-
Y : numpy.ndarray, shape (n_samples, n_properties)
229-
Training data, where n_samples is the number of samples and n_properties is
230-
the number of properties
231-
232-
It is suggested that :math:`\mathbf{X}` be centered by its column-means and
233-
scaled. If features are related, the matrix should be scaled to have unit
234-
variance, otherwise :math:`\mathbf{Y}` should be scaled so that each feature
235-
has a variance of 1 / n_features.
236-
237-
If the passed classifier = `precomputed`, it is assumed that Y is the
238-
classified form of the properties, :math:`{\mathbf{\hat{Y}}}`.
235+
Y : numpy.ndarray, shape (n_samples,)
236+
Training data, where n_samples is the number of samples.
239237
240238
W : numpy.ndarray, shape (n_features, n_properties)
241239
Classification weights, optional when classifier=`precomputed`. If not
242-
passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]`
240+
passed, it is assumed that the weights will be taken from a linear classifier
241+
fit between X and Y
243242
"""
244-
X, Y = validate_data(self, X, Y, y_numeric=False, multi_output=True)
243+
244+
X, Y = validate_data(self, X, Y, y_numeric=False)
245245
check_classification_targets(Y)
246246
self.classes_ = np.unique(Y)
247247

248248
super()._fit_utils(X)
249249

250250
compatible_classifiers = (
251-
LinearDiscriminantAnalysis,
252-
LinearSVC,
253251
LogisticRegression,
254252
LogisticRegressionCV,
255-
MultiOutputClassifier,
256-
Perceptron,
253+
LinearSVC,
254+
LinearDiscriminantAnalysis,
257255
RidgeClassifier,
258256
RidgeClassifierCV,
259257
SGDClassifier,
258+
Perceptron,
260259
)
261260

262261
if self.classifier not in ["precomputed", None] and not isinstance(
@@ -275,20 +274,13 @@ def fit(self, X, Y, W=None):
275274
classifier = self.classifier
276275

277276
self.z_classifier_ = check_cl_fit(classifier, X, Y)
277+
W = self.z_classifier_.coef_.T.reshape(X.shape[1], -1)
278278

279-
if isinstance(self.z_classifier_, MultiOutputClassifier):
280-
W = np.hstack([est_.coef_.T for est_ in self.z_classifier_.estimators_])
281-
else:
282-
W = self.z_classifier_.coef_.T.reshape(X.shape[1], -1)
283-
284-
# we don't want to copy all parameters of classifier, such as n_features_in, since we are re-fitting it on T and Y
285-
self.classifier_ = clone(classifier)
286279
else:
280+
# If precomputed, use default classifier to predict Y from T
281+
classifier = LogisticRegression()
287282
if W is None:
288-
W = np.linalg.lstsq(X, Y, self.tol)[0]
289-
290-
# if precomputed, use default classifier to predict Y from T
291-
self.classifier_ = LogisticRegression()
283+
W = LogisticRegression().fit(X, Y).coef_.T.reshape(X.shape[1], -1)
292284

293285
Z = X @ W
294286

@@ -299,16 +291,11 @@ def fit(self, X, Y, W=None):
299291

300292
# instead of using linear regression solution, refit with the classifier
301293
# and steal weights to get pxz and ptz
302-
self.classifier_.fit(X @ self.pxt_, Y)
303294

304-
if isinstance(self.classifier_, MultiOutputClassifier):
305-
self.ptz_ = np.hstack(
306-
[est_.coef_.T for est_ in self.classifier_.estimators_]
307-
)
308-
self.pxz_ = self.pxt_ @ self.ptz_
309-
else:
310-
self.ptz_ = self.classifier_.coef_.T
311-
self.pxz_ = self.pxt_ @ self.ptz_
295+
self.classifier_ = clone(classifier).fit(X @ self.pxt_, Y)
296+
297+
self.ptz_ = self.classifier_.coef_.T
298+
self.pxz_ = self.pxt_ @ self.ptz_
312299

313300
if len(Y.shape) == 1 and type_of_target(Y) == "binary":
314301
self.pxz_ = self.pxz_.reshape(
@@ -426,4 +413,4 @@ def transform(self, X=None):
426413
New data, where n_samples is the number of samples
427414
and n_features is the number of features.
428415
"""
429-
return super().transform(X)
416+
return super().transform(X)

src/skmatter/decomposition/_pcovr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,4 +427,4 @@ def score(self, X, y, T=None):
427427
def __sklearn_tags__(self):
428428
tags = super().__sklearn_tags__()
429429
tags.regressor_tags.poor_score = True
430-
return tags
430+
return tags

tests/test_pcovc.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
from sklearn import exceptions
66
from sklearn.datasets import load_breast_cancer as get_dataset
77
from sklearn.decomposition import PCA
8-
from sklearn.linear_model import LogisticRegression, RidgeClassifier
9-
from sklearn.svm import LinearSVC
8+
from sklearn.linear_model import LogisticRegression
109

1110
from sklearn.naive_bayes import GaussianNB
1211
from sklearn.preprocessing import StandardScaler
@@ -216,8 +215,8 @@ def test_spaces_equivalent(self):
216215

217216
self.assertTrue(
218217
np.allclose(
219-
pcovc_ss.predict(self.X),
220-
pcovc_fs.predict(self.X),
218+
pcovc_ss.decision_function(self.X),
219+
pcovc_fs.decision_function(self.X),
221220
self.error_tol,
222221
)
223222
)
@@ -522,9 +521,10 @@ def test_incompatible_classifier(self):
522521
self.assertEqual(
523522
str(cm.exception),
524523
"Classifier must be an instance of "
525-
"`LinearDiscriminantAnalysis`, `LinearSVC`, `LogisticRegression`, "
526-
"`LogisticRegressionCV`, `MultiOutputClassifier`, `Perceptron`, "
527-
"`RidgeClassifier`, `RidgeClassifierCV`, `SGDClassifier`, or `precomputed`",
524+
"`LogisticRegression`, `LogisticRegressionCV`, `LinearSVC`, "
525+
"`LinearDiscriminantAnalysis`, `RidgeClassifier`, "
526+
"`RidgeClassifierCV`, `SGDClassifier`, `Perceptron`, "
527+
"or `precomputed`",
528528
)
529529

530530
def test_none_classifier(self):
@@ -570,4 +570,4 @@ def test_incompatible_coef_shape(self):
570570

571571

572572
if __name__ == "__main__":
573-
unittest.main(verbosity=2)
573+
unittest.main(verbosity=2)

0 commit comments

Comments
 (0)