1111)
1212from sklearn .linear_model ._base import LinearClassifierMixin
1313from sklearn .svm import LinearSVC
14- from sklearn .multioutput import MultiOutputClassifier
1514from sklearn .utils import check_array
1615from sklearn .utils .validation import check_is_fitted , validate_data
1716from sklearn .utils .multiclass import check_classification_targets , type_of_target
@@ -99,21 +98,19 @@ class PCovC(LinearClassifierMixin, _BasePCov):
9998 default=`sample` when :math:`{n_{samples} < n_{features}}` and
10099 `feature` when :math:`{n_{features} < n_{samples}}`
101100
102- classifier: {`RidgeClassifier`, `RidgeClassifierCV`, `LogisticRegression`,
103- `LogisticRegressionCV`, `SGDClassifier`, `LinearSVC`, `precomputed`}, default=None
104- classifier for computing :math:`{\mathbf{Z}}`. The classifier should be one
105- `sklearn.linear_model.RidgeClassifier`, `sklearn.linear_model.RidgeClassifierCV`,
101+ classifier: {`LogisticRegression`, `LogisticRegressionCV`, `LinearSVC`, `LinearDiscriminantAnalysis`,
102+ `RidgeClassifier`, `RidgeClassifierCV`, `SGDClassifier`, `Perceptron`, `precomputed`}, default=None
103+ classifier for computing :math:`{\mathbf{Z}}`. The classifier should be one of
106104 `sklearn.linear_model.LogisticRegression`, `sklearn.linear_model.LogisticRegressionCV`,
107- `sklearn.linear_model.SGDClassifier`, or `sklearn.svm.LinearSVC`. If a pre-fitted classifier
105+ `sklearn.svm.LinearSVC`, `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`,
106+ `sklearn.linear_model.RidgeClassifier`, `sklearn.linear_model.RidgeClassifierCV`,
107+ `sklearn.linear_model.SGDClassifier`, or `Perceptron`. If a pre-fitted classifier
108108 is provided, it is used to compute :math:`{\mathbf{Z}}`.
109109 Note that any pre-fitting of the classifier will be lost if `PCovC` is
110110 within a composite estimator that enforces cloning, e.g.,
111- `sklearn.compose.TransformedTargetclassifier` or
112111 `sklearn.pipeline.Pipeline` with model caching.
113112 In such cases, the classifier will be re-fitted on the same
114113 training data as the composite estimator.
115- If `precomputed`, we assume that the `y` passed to the `fit` function
116- is the classified form of the targets :math:`{\mathbf{\hat{Y}}}`.
117114 If None, ``sklearn.linear_model.LogisticRegression()``
118115 is used as the classifier.
119116
@@ -147,15 +144,24 @@ class PCovC(LinearClassifierMixin, _BasePCov):
147144 n_components, or the lesser value of n_features and n_samples
148145 if n_components is None.
149146
147+ classifier : estimator object
148+ The linear classifier passed for fitting.
149+
150+ z_classifier_ : estimator object
151+ The linear classifier fit between X and Y.
152+
153+ classifier_ : estimator object
154+ The linear classifier fit between T and Y.
155+
150156 pxt_ : ndarray of size :math:`({n_{features}, n_{components}})`
151157 the projector, or weights, from the input space :math:`\mathbf{X}`
152158 to the latent-space projection :math:`\mathbf{T}`
153159
154- pxz_ : ndarray of size :math:`({n_{features}, n_{classes}})`
160+ pxz_ : ndarray of size :math: `({n_{features}, })` or `({n_{features}, n_{classes}})`
155161 the projector, or weights, from the input space :math:`\mathbf{X}`
156162 to the class confidence scores :math:`\mathbf{Z}`
157163
158- ptz_ : ndarray of size :math:`({n_{components}, n_{classes}})`
164+ ptz_ : ndarray of size :math: ``({n_{components}, })` or `({n_{components}, n_{classes}})`
159165 the projector, or weights, from the latent-space projection
160166 :math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`
161167
@@ -171,19 +177,20 @@ class PCovC(LinearClassifierMixin, _BasePCov):
171177 --------
172178 >>> import numpy as np
173179 >>> from skmatter.decomposition import PCovC
180+ >>> from sklearn.preprocessing import StandardScaler
174181 >>> X = np.array([[-1, 0, -2, 3], [3, -2, 0, 1], [-3, 0, -1, -1], [1, 3, 0, -2]])
175182 >>> X = StandardScaler().fit_transform(X)
176183 >>> Y = np.array([0, 1, 2, 0])
177184 >>> pcovc = PCovC(mixing=0.1, n_components=2)
178185 >>> pcovc.fit(X, Y)
179186 PCovC(mixing=0.1, n_components=2)
180187 >>> pcovc.transform(X)
181- array([[-0.4794854 -0.46228114]
182- [ 1.9416966 0.2532831 ]
183- [-1.08744947 0.89117784]
184- [-0.37476173 -0.6821798 ]])
188+ array([[-0.4794854 , -0.46228114],
189+ [ 1.9416966 , 0.2532831 ],
190+ [-1.08744947, 0.89117784],
191+ [-0.37476173, -0.6821798 ]])
185192 >>> pcovc.predict(X)
186- array([0 1 2 0])
193+ array([0, 1, 2, 0])
187194 """ # NoQa: E501
188195
189196 def __init__ (
@@ -225,38 +232,30 @@ def fit(self, X, Y, W=None):
225232 to have unit variance, otherwise :math:`\mathbf{X}` should be
226233 scaled so that each feature has a variance of 1 / n_features.
227234
228- Y : numpy.ndarray, shape (n_samples, n_properties)
229- Training data, where n_samples is the number of samples and n_properties is
230- the number of properties
231-
232- It is suggested that :math:`\mathbf{X}` be centered by its column-means and
233- scaled. If features are related, the matrix should be scaled to have unit
234- variance, otherwise :math:`\mathbf{Y}` should be scaled so that each feature
235- has a variance of 1 / n_features.
236-
237- If the passed classifier = `precomputed`, it is assumed that Y is the
238- classified form of the properties, :math:`{\mathbf{\hat{Y}}}`.
235+ Y : numpy.ndarray, shape (n_samples,)
236+ Training data, where n_samples is the number of samples.
239237
240238 W : numpy.ndarray, shape (n_features, n_properties)
241239 Classification weights, optional when classifier=`precomputed`. If not
242- passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]`
240+ passed, it is assumed that the weights will be taken from a linear classifier
241+ fit between X and Y
243242 """
244- X , Y = validate_data (self , X , Y , y_numeric = False , multi_output = True )
243+
244+ X , Y = validate_data (self , X , Y , y_numeric = False )
245245 check_classification_targets (Y )
246246 self .classes_ = np .unique (Y )
247247
248248 super ()._fit_utils (X )
249249
250250 compatible_classifiers = (
251- LinearDiscriminantAnalysis ,
252- LinearSVC ,
253251 LogisticRegression ,
254252 LogisticRegressionCV ,
255- MultiOutputClassifier ,
256- Perceptron ,
253+ LinearSVC ,
254+ LinearDiscriminantAnalysis ,
257255 RidgeClassifier ,
258256 RidgeClassifierCV ,
259257 SGDClassifier ,
258+ Perceptron ,
260259 )
261260
262261 if self .classifier not in ["precomputed" , None ] and not isinstance (
@@ -275,20 +274,13 @@ def fit(self, X, Y, W=None):
275274 classifier = self .classifier
276275
277276 self .z_classifier_ = check_cl_fit (classifier , X , Y )
277+ W = self .z_classifier_ .coef_ .T .reshape (X .shape [1 ], - 1 )
278278
279- if isinstance (self .z_classifier_ , MultiOutputClassifier ):
280- W = np .hstack ([est_ .coef_ .T for est_ in self .z_classifier_ .estimators_ ])
281- else :
282- W = self .z_classifier_ .coef_ .T .reshape (X .shape [1 ], - 1 )
283-
284- # we don't want to copy all parameters of classifier, such as n_features_in, since we are re-fitting it on T and Y
285- self .classifier_ = clone (classifier )
286279 else :
280+ # If precomputed, use default classifier to predict Y from T
281+ classifier = LogisticRegression ()
287282 if W is None :
288- W = np .linalg .lstsq (X , Y , self .tol )[0 ]
289-
290- # if precomputed, use default classifier to predict Y from T
291- self .classifier_ = LogisticRegression ()
283+ W = LogisticRegression ().fit (X , Y ).coef_ .T .reshape (X .shape [1 ], - 1 )
292284
293285 Z = X @ W
294286
@@ -299,16 +291,11 @@ def fit(self, X, Y, W=None):
299291
300292 # instead of using linear regression solution, refit with the classifier
301293 # and steal weights to get pxz and ptz
302- self .classifier_ .fit (X @ self .pxt_ , Y )
303294
304- if isinstance (self .classifier_ , MultiOutputClassifier ):
305- self .ptz_ = np .hstack (
306- [est_ .coef_ .T for est_ in self .classifier_ .estimators_ ]
307- )
308- self .pxz_ = self .pxt_ @ self .ptz_
309- else :
310- self .ptz_ = self .classifier_ .coef_ .T
311- self .pxz_ = self .pxt_ @ self .ptz_
295+ self .classifier_ = clone (classifier ).fit (X @ self .pxt_ , Y )
296+
297+ self .ptz_ = self .classifier_ .coef_ .T
298+ self .pxz_ = self .pxt_ @ self .ptz_
312299
313300 if len (Y .shape ) == 1 and type_of_target (Y ) == "binary" :
314301 self .pxz_ = self .pxz_ .reshape (
@@ -426,4 +413,4 @@ def transform(self, X=None):
426413 New data, where n_samples is the number of samples
427414 and n_features is the number of features.
428415 """
429- return super ().transform (X )
416+ return super ().transform (X )
0 commit comments