@@ -29,6 +29,9 @@ class FastCan(SelectorMixin, BaseEstimator):
2929 indices_include : array-like of shape (n_inclusions,), default=None
3030 The indices of the prerequisite features.
3131
32+ indices_exclude : array-like of shape (n_exclusions,), default=None
33+ The indices of the excluded features.
34+
3235 eta : bool, default=False
3336 Whether to use eta-cosine method.
3437
@@ -63,6 +66,16 @@ class FastCan(SelectorMixin, BaseEstimator):
6366 The h-correlation/eta-cosine of selected features. The order of
6467 the scores is corresponding to the feature selection process.
6568
69+ X_transformed_ : ndarray of shape (n_samples_, n_features), dtype=float, order='F'
70+ Transformed feature matrix.
71+ When h-correlation method is used, n_samples_ = n_samples.
72+ When eta-cosine method is used, n_samples_ = n_features+n_outputs.
73+
74+ y_transformed_ : ndarray of shape (n_samples_, n_outputs), dtype=float, order='F'
75+ Transformed target matrix.
76+ When h-correlation method is used, n_samples_ = n_samples.
77+ When eta-cosine method is used, n_samples_ = n_features+n_outputs.
78+
6679 References
6780 ----------
6881 * Zhang, S., & Lang, Z. Q. (2022).
@@ -88,6 +101,7 @@ class FastCan(SelectorMixin, BaseEstimator):
88101 Interval (Integral , 1 , None , closed = "left" ),
89102 ],
90103 "indices_include" : [None , "array-like" ],
104+ "indices_exclude" : [None , "array-like" ],
91105 "eta" : ["boolean" ],
92106 "tol" : [Interval (Real , 0 , None , closed = "neither" )],
93107 "verbose" : ["verbose" ],
@@ -97,12 +111,14 @@ def __init__(
97111 self ,
98112 n_features_to_select = 1 ,
99113 indices_include = None ,
114+ indices_exclude = None ,
100115 eta = False ,
101116 tol = 0.01 ,
102117 verbose = 1 ,
103118 ):
104119 self .n_features_to_select = n_features_to_select
105120 self .indices_include = indices_include
121+ self .indices_exclude = indices_exclude
106122 self .eta = eta
107123 self .tol = tol
108124 self .verbose = verbose
@@ -152,17 +168,6 @@ def fit(self, X, y):
152168 # [:, np.newaxis] that does not.
153169 y = y .reshape (- 1 , 1 )
154170
155- # indices_include
156- if self .indices_include is None :
157- indices_include = np .zeros (0 , dtype = int )
158- else :
159- indices_include = check_array (
160- self .indices_include ,
161- ensure_2d = False ,
162- dtype = int ,
163- ensure_min_samples = 0 ,
164- )
165-
166171 n_samples , n_features = X .shape
167172 n_outputs = y .shape [1 ]
168173
@@ -172,29 +177,12 @@ def fit(self, X, y):
172177 f"must be <= n_features { n_features } ."
173178 )
174179
175- if indices_include .ndim != 1 :
176- raise ValueError (
177- f"Found indices_include with dim { indices_include .ndim } , "
178- "but expected == 1."
179- )
180-
181- if indices_include .size >= n_features :
182- raise ValueError (
183- f"n_inclusions { indices_include .size } must "
184- f"be < n_features { n_features } ."
185- )
186-
187- if np .any ((indices_include < 0 ) | (indices_include >= n_features )):
188- raise ValueError (
189- "Out of bounds. "
190- f"All items in indices_include should be in [0, { n_features } ). "
191- f"But got indices_include = { indices_include } ."
192- )
193-
194180 if (n_samples < n_features + n_outputs ) and self .eta :
195181 raise ValueError (
196182 "`eta` cannot be True, when n_samples < n_features+n_outputs."
197183 )
184+ indices_include = self ._check_indices_params (self .indices_include , n_features )
185+ indices_exclude = self ._check_indices_params (self .indices_exclude , n_features )
198186
199187 if self .eta :
200188 xy_hstack = np .hstack ((X , y ))
@@ -204,23 +192,28 @@ def fit(self, X, y):
204192 )[1 :]
205193 qxy_transformed = singular_values .reshape (- 1 , 1 ) * unitary_arrays
206194 qxy_transformed = np .asfortranarray (qxy_transformed )
207- X_transformed = qxy_transformed [:, :n_features ]
208- y_transformed = orth (qxy_transformed [:, n_features :])
195+ self . X_transformed_ = qxy_transformed [:, :n_features ]
196+ self . y_transformed_ = orth (qxy_transformed [:, n_features :])
209197 else :
210- X_transformed = X - X .mean (0 )
211- y_transformed = orth (y - y .mean (0 ))
198+ self .X_transformed_ = X - X .mean (0 )
199+ self .y_transformed_ = orth (y - y .mean (0 ))
200+
201+ # initiated with -1
202+ indices = np .full (self .n_features_to_select , - 1 , dtype = np .intc , order = "F" )
203+ indices [: indices_include .size ] = indices_include
204+ scores = np .zeros (self .n_features_to_select , dtype = float , order = "F" )
205+ mask = np .zeros (n_features , dtype = np .ubyte , order = "F" )
206+ mask [indices_exclude ] = True
212207
213- indices , scores = self ._prepare_data (
214- indices_include ,
215- )
216208 n_threads = _openmp_effective_n_threads ()
217209 _forward_search (
218- X = X_transformed ,
219- V = y_transformed ,
210+ X = self . X_transformed_ ,
211+ V = self . y_transformed_ ,
220212 t = self .n_features_to_select ,
221213 tol = self .tol ,
222214 num_threads = n_threads ,
223215 verbose = self .verbose ,
216+ mask = mask ,
224217 indices = indices ,
225218 scores = scores ,
226219 )
@@ -231,34 +224,37 @@ def fit(self, X, y):
231224 self .scores_ = scores
232225 return self
233226
234- def _prepare_data (self , indices_include ):
235- """Prepare data for _forward_search()
236- When h-correlation method is used, n_samples_ = n_samples.
237- When eta-cosine method is used, n_samples_ = n_features+n_outputs.
238-
239- Parameters
240- ----------
241- indices_include : array-like of shape (n_inclusions,), dtype=int
242- The indices of the prerequisite features.
227+ def _check_indices_params (self , indices_params , n_features ):
228+ """Check indices_include or indices_exclude."""
229+ if indices_params is None :
230+ indices_params = np .zeros (0 , dtype = int )
231+ else :
232+ indices_params = check_array (
233+ indices_params ,
234+ ensure_2d = False ,
235+ dtype = int ,
236+ ensure_min_samples = 0 ,
237+ )
243238
244- Returns
245- -------
246- mask : ndarray of shape (n_features,), dtype=np.ubyte, order='F'
247- Mask for invalid candidate features.
248- The data type is unsigned char.
239+ if indices_params . ndim != 1 :
240+ raise ValueError (
241+ f"Found indices_params with dim { indices_params . ndim } , "
242+ "but expected == 1."
243+ )
249244
250- indices: ndarray of shape (n_features_to_select,), dtype=np.intc, order='F'
251- The indices vector of selected features, initiated with -1.
252- The data type is signed int.
245+ if indices_params .size >= n_features :
246+ raise ValueError (
247+ f"The number of indices in indices_params { indices_params .size } must "
248+ f"be < n_features { n_features } ."
249+ )
253250
254- scores: ndarray of shape (n_features_to_select,), dtype=float, order='F'
255- The h-correlation/eta-cosine of selected features.
256- """
257- # initiated with -1
258- indices = np .full (self .n_features_to_select , - 1 , dtype = np .intc , order = "F" )
259- indices [: indices_include .size ] = indices_include
260- scores = np .zeros (self .n_features_to_select , dtype = float , order = "F" )
261- return indices , scores
251+ if np .any ((indices_params < 0 ) | (indices_params >= n_features )):
252+ raise ValueError (
253+ "Out of bounds. "
254+ f"All items in indices_params should be in [0, { n_features } ). "
255+ f"But got indices_params = { indices_params } ."
256+ )
257+ return indices_params
262258
263259 def _get_support_mask (self ):
264260 check_is_fitted (self )
0 commit comments