99from scipy .linalg import sqrtm as MatrixSqrt
1010from scipy .sparse .linalg import svds
1111
12- from sklearn import clone
13- from sklearn .base import check_X_y
14- from sklearn .calibration import column_or_1d
1512from sklearn .decomposition ._base import _BasePCA
16- from sklearn .linear_model import LogisticRegression , Ridge
1713from sklearn .linear_model ._base import LinearModel
18- from sklearn .multioutput import MultiOutputClassifier
19- from sklearn .naive_bayes import LabelBinarizer
2014from sklearn .decomposition ._pca import _infer_dimension
21- from sklearn .utils import check_array , check_random_state
15+ from sklearn .utils import check_random_state
2216from sklearn .utils ._arpack import _init_arpack_v0
2317from sklearn .utils .extmath import randomized_svd , stable_cumsum , svd_flip
24- from sklearn .utils .validation import check_is_fitted , check_X_y
18+ from sklearn .utils .validation import check_is_fitted
2519
26- from skmatter .utils import check_lr_fit , pcovr_covariance , pcovr_kernel
27-
28- import sys
29- sys .path .append ('scikit-matter' )
30- from src .skmatter .utils ._pcovc_utils import check_cl_fit
20+ from skmatter .utils import pcovr_covariance , pcovr_kernel
3121
3222class _BasePCov (_BasePCA , LinearModel ):
3323 def __init__ (
@@ -37,29 +27,22 @@ def __init__(
3727 svd_solver = "auto" ,
3828 tol = 1e-12 ,
3929 space = "auto" ,
40- regressor = None ,
41- classifier = None ,
4230 iterated_power = "auto" ,
4331 random_state = None ,
4432 whiten = False ,
45- subclass = None
46-
4733 ):
4834 self .mixing = mixing
4935 self .n_components = n_components
5036 self .svd_solver = svd_solver
5137 self .tol = tol
5238 self .space = space
53- self .regressor = regressor
54- self .classifier = classifier
5539 self .iterated_power = iterated_power
5640 self .random_state = random_state
5741 self .whiten = whiten
58- self .subclass = subclass
59-
60- def fit (self , X , y , W = None ):
61- X , y = check_X_y (X , y , y_numeric = True if self .subclass == "PCovR" else False , multi_output = True )
6242
43+ # this contains the common functionality for PCovR and PCovC fit methods,
44+ # but leaves the rest of the fit functionality to the subclass
45+ def _fit_util (self , X , y ):
6346 # saved for inverse transformations from the latent space,
6447 # should be zero in the case that the features have been properly centered
6548 self .mean_ = np .mean (X , axis = 0 )
@@ -87,7 +70,6 @@ def fit(self, X, y, W=None):
8770 else :
8871 self .n_components_ = self .n_components
8972
90-
9173 # Handle svd_solver
9274 self .fit_svd_solver_ = self .svd_solver
9375 if self .fit_svd_solver_ == "auto" :
@@ -107,127 +89,7 @@ def fit(self, X, y, W=None):
10789 self .space_ = "feature"
10890 else :
10991 self .space_ = "sample"
110-
111- if self .subclass == "PCovR" :
112- # Assign the default regressor
113- if self .regressor != "precomputed" :
114- if self .regressor is None :
115- regressor = Ridge (
116- alpha = 1e-6 ,
117- fit_intercept = False ,
118- tol = 1e-12 ,
119- )
120- else :
121- regressor = self .regressor
122-
123- self .regressor_ = check_lr_fit (regressor , X , y = y )
124-
125- W = self .regressor_ .coef_ .T .reshape (X .shape [1 ], - 1 )
126- Yhat = self .regressor_ .predict (X ).reshape (X .shape [0 ], - 1 )
127- else :
128- Yhat = y .copy ()
129- if W is None :
130- W = np .linalg .lstsq (X , Yhat , self .tol )[0 ]
131-
132- if self .space_ == "feature" :
133- self ._fit_feature_space (X , y .reshape (Yhat .shape ), Yhat )
134- else :
135- self ._fit_sample_space (X , y .reshape (Yhat .shape ), Yhat , W )
136-
137- self .pxy_ = self .pxt_ @ self .pty_
138- if len (y .shape ) == 1 :
139- self .pxy_ = self .pxy_ .reshape (
140- X .shape [1 ],
141- )
142- self .pty_ = self .pty_ .reshape (
143- self .n_components_ ,
144- )
145-
146- self .components_ = self .pxt_ .T # for sklearn compatibility
147-
148- else :
149- # Assign the default classifier
150- if self .classifier != "precomputed" :
151- if self .classifier is None :
152- classifier = LogisticRegression ()
153- else :
154- classifier = self .classifier
155-
156- self .z_classifier_ = check_cl_fit (classifier , X , y = y ) #change to z classifier, fits linear classifier on x and y to get Pxz
157-
158- if isinstance (self .z_classifier_ , MultiOutputClassifier ):
159- W = np .hstack ([est_ .coef_ .T for est_ in self .z_classifier_ .estimators_ ])
160- Z = X @ W #computes Z, basically Z=XPxz
161-
162- else :
163- W = self .z_classifier_ .coef_ .T .reshape (X .shape [1 ], - 1 )
164- Z = self .z_classifier_ .decision_function (X ).reshape (X .shape [0 ], - 1 ) #computes Z this will throw an error since pxz and ptz aren't defined yet
165-
166- else :
167- Z = y .copy ()
168- if W is None :
169- W = np .linalg .lstsq (X , Z , self .tol )[0 ] #W = weights for Pxz
170-
171- self ._label_binarizer = LabelBinarizer (neg_label = - 1 , pos_label = 1 )
172- Y = self ._label_binarizer .fit_transform (y )
173- if not self ._label_binarizer .y_type_ .startswith ("multilabel" ):
174- y = column_or_1d (y , warn = True )
175-
176- if self .space_ == "feature" :
177- self ._fit_feature_space (X , Y .reshape (Z .shape ), Z )
178- else :
179- self ._fit_sample_space (X , Y .reshape (Z .shape ), Z , W )
180-
181- # instead of using linear regression solution, refit with the classifier
182- # and steal weights to get ptz
183- # this is failing because self.classifier is never changed from None if None is passed as classifier
184- # change self.classifier to classifier and see what happens. if classifier is precomputed, there might be more errors so be careful.
185- # if classifier is precomputed, I don't think we need to check if the classifier is fit or not?
186-
187- #cases:
188- #1. if classifier has been fit with X and Y already, we need to use classifier that hasn't been fitted and refit on T, y
189- #2. if classifier has not been fit with X and Y, we call check_cl_fit
190-
191- # if (fitted(X,y)):
192- #
193- # else:
194- # check_cl_fit
195-
196- #self.classifier_ = check_cl_fit(classifier, X @ self.pxt_, y=y)
197- #we don't want to copy ALl parameters of classifier, such as n_features_in, since we are re-fitting it on T, y
198- if self .classifier != "precomputed" :
199- self .classifier_ = clone (classifier ).fit (X @ self .pxt_ , y )
200- else :
201- self .classifier_ = LogisticRegression ().fit (X @ self .pxt_ , y )
202-
203- self .classifier_ ._validate_data (X @ self .pxt_ , y , reset = False )
204-
205- #self.classifier_ = LogisticRegression().fit(X @ self.pxt_, y)
206- #check_cl_fit(classifier., X @ self.pxt_, y=y) #Has Ptz as weights
207- #print("Self.classifier_ shape "+ str(self.classifier_.coef_.shape))
208- #print("PCovC Self.pxt_ "+ str((self.pxt_).shape))
209-
210- if isinstance (self .classifier_ , MultiOutputClassifier ):
211- self .ptz_ = np .hstack (
212- [est_ .coef_ .T for est_ in self .classifier_ .estimators_ ]
213- )
214- self .pxz_ = self .pxt_ @ self .ptz_
215- else :
216- self .ptz_ = self .classifier_ .coef_ .T #self.ptz_ = self.classifier_.coef.T
217- self .pxz_ = self .pxt_ @ self .ptz_ #self.pxz_ = self.pxt_ @ self.ptz_
218-
219- if len (Y .shape ) == 1 :
220- self .pxz_ = self .pxz_ .reshape (
221- X .shape [1 ],
222- )
223- self .ptz_ = self .ptz_ .reshape (
224- self .n_components_ ,
225- )
226-
227- self .components_ = self .pxt_ .T # for sklearn compatibility
228-
229- return self
230-
92+
23193 def _fit_feature_space (self , X , Y , Yhat ):
23294 Ct , iCsqrt = pcovr_covariance (
23395 mixing = self .mixing ,
@@ -264,8 +126,7 @@ def _fit_feature_space(self, X, Y, Yhat):
264126
265127 self .pxt_ = np .linalg .multi_dot ([iCsqrt , Vt .T , S_sqrt ])
266128 self .ptx_ = np .linalg .multi_dot ([S_sqrt_inv , Vt , Csqrt ])
267- if self .subclass == "PCovR" :
268- self .pty_ = np .linalg .multi_dot ([S_sqrt_inv , Vt , iCsqrt , X .T , Y ])
129+ self .pty_ = np .linalg .multi_dot ([S_sqrt_inv , Vt , iCsqrt , X .T , Y ])
269130
270131 def _fit_sample_space (self , X , Y , Yhat , W ):
271132 Kt = pcovr_kernel (mixing = self .mixing , X = X , Y = Yhat )
@@ -291,8 +152,7 @@ def _fit_sample_space(self, X, Y, Yhat, W):
291152
292153 self .pxt_ = P @ T
293154 self .ptx_ = T .T @ X
294- if self .subclass == "PCovR" :
295- self .pty_ = T .T @ Y
155+ self .pty_ = T .T @ Y
296156
297157 #exactly same in PCovR/PCovC
298158 def _decompose_truncated (self , mat ):
@@ -422,38 +282,8 @@ def inverse_transform(self, T):
422282
423283 return T @ self .ptx_
424284
425- def predict (self , X = None , T = None ):
426- if X is None and T is None :
427- raise ValueError ("Either X or T must be supplied." )
428-
429- if (X is not None ):
430- if self .subclass == "PCovR" :
431- X = check_array (X )
432- return X @ self .pxy_
433- else :
434- return self .classifier_ .predict (X @ self .pxt_ ) #Ptz(T) -> activation -> Y labels
435- else :
436- if self .subclass == "PCovR" :
437- T = check_array (T )
438- return T @ self .pty_
439- else :
440- return self .classifier_ .predict (T ) #Ptz(T) -> activation -> Y labels
441-
442-
443- #exactly the same in PCovr/PCovC
285+ #exactly the same in PCovR/PCovC
444286 def transform (self , X = None ):
445287 check_is_fitted (self , ["pxt_" , "mean_" ])
446-
447288 return super ().transform (X )
448-
449- def score (self , X , Y , T = None ):
450- if T is None :
451- T = self .transform (X )
452-
453- x = self .inverse_transform (T )
454- y = self .predict (T = T ) if self .subclass == "PCovR" else self .decision_function (T = T )
455-
456- return - (
457- np .linalg .norm (X - x ) ** 2.0 / np .linalg .norm (X ) ** 2.0
458- + np .linalg .norm (Y - y ) ** 2.0 / np .linalg .norm (Y ) ** 2.0
459- )
289+
0 commit comments