@@ -29,6 +29,9 @@ class FastCan(SelectorMixin, BaseEstimator):
29
29
indices_include : array-like of shape (n_inclusions,), default=None
30
30
The indices of the prerequisite features.
31
31
32
+ indices_exclude : array-like of shape (n_exclusions,), default=None
33
+ The indices of the excluded features.
34
+
32
35
eta : bool, default=False
33
36
Whether to use eta-cosine method.
34
37
@@ -63,6 +66,16 @@ class FastCan(SelectorMixin, BaseEstimator):
63
66
The h-correlation/eta-cosine of selected features. The order of
64
67
the scores is corresponding to the feature selection process.
65
68
69
+ X_transformed_ : ndarray of shape (n_samples_, n_features), dtype=float, order='F'
70
+ Transformed feature matrix.
71
+ When h-correlation method is used, n_samples_ = n_samples.
72
+ When eta-cosine method is used, n_samples_ = n_features+n_outputs.
73
+
74
+ y_transformed_ : ndarray of shape (n_samples_, n_outputs), dtype=float, order='F'
75
+ Transformed target matrix.
76
+ When h-correlation method is used, n_samples_ = n_samples.
77
+ When eta-cosine method is used, n_samples_ = n_features+n_outputs.
78
+
66
79
References
67
80
----------
68
81
* Zhang, S., & Lang, Z. Q. (2022).
@@ -88,6 +101,7 @@ class FastCan(SelectorMixin, BaseEstimator):
88
101
Interval (Integral , 1 , None , closed = "left" ),
89
102
],
90
103
"indices_include" : [None , "array-like" ],
104
+ "indices_exclude" : [None , "array-like" ],
91
105
"eta" : ["boolean" ],
92
106
"tol" : [Interval (Real , 0 , None , closed = "neither" )],
93
107
"verbose" : ["verbose" ],
@@ -97,12 +111,14 @@ def __init__(
97
111
self ,
98
112
n_features_to_select = 1 ,
99
113
indices_include = None ,
114
+ indices_exclude = None ,
100
115
eta = False ,
101
116
tol = 0.01 ,
102
117
verbose = 1 ,
103
118
):
104
119
self .n_features_to_select = n_features_to_select
105
120
self .indices_include = indices_include
121
+ self .indices_exclude = indices_exclude
106
122
self .eta = eta
107
123
self .tol = tol
108
124
self .verbose = verbose
@@ -152,17 +168,6 @@ def fit(self, X, y):
152
168
# [:, np.newaxis] that does not.
153
169
y = y .reshape (- 1 , 1 )
154
170
155
- # indices_include
156
- if self .indices_include is None :
157
- indices_include = np .zeros (0 , dtype = int )
158
- else :
159
- indices_include = check_array (
160
- self .indices_include ,
161
- ensure_2d = False ,
162
- dtype = int ,
163
- ensure_min_samples = 0 ,
164
- )
165
-
166
171
n_samples , n_features = X .shape
167
172
n_outputs = y .shape [1 ]
168
173
@@ -172,29 +177,12 @@ def fit(self, X, y):
172
177
f"must be <= n_features { n_features } ."
173
178
)
174
179
175
- if indices_include .ndim != 1 :
176
- raise ValueError (
177
- f"Found indices_include with dim { indices_include .ndim } , "
178
- "but expected == 1."
179
- )
180
-
181
- if indices_include .size >= n_features :
182
- raise ValueError (
183
- f"n_inclusions { indices_include .size } must "
184
- f"be < n_features { n_features } ."
185
- )
186
-
187
- if np .any ((indices_include < 0 ) | (indices_include >= n_features )):
188
- raise ValueError (
189
- "Out of bounds. "
190
- f"All items in indices_include should be in [0, { n_features } ). "
191
- f"But got indices_include = { indices_include } ."
192
- )
193
-
194
180
if (n_samples < n_features + n_outputs ) and self .eta :
195
181
raise ValueError (
196
182
"`eta` cannot be True, when n_samples < n_features+n_outputs."
197
183
)
184
+ indices_include = self ._check_indices_params (self .indices_include , n_features )
185
+ indices_exclude = self ._check_indices_params (self .indices_exclude , n_features )
198
186
199
187
if self .eta :
200
188
xy_hstack = np .hstack ((X , y ))
@@ -204,23 +192,28 @@ def fit(self, X, y):
204
192
)[1 :]
205
193
qxy_transformed = singular_values .reshape (- 1 , 1 ) * unitary_arrays
206
194
qxy_transformed = np .asfortranarray (qxy_transformed )
207
- X_transformed = qxy_transformed [:, :n_features ]
208
- y_transformed = orth (qxy_transformed [:, n_features :])
195
+ self . X_transformed_ = qxy_transformed [:, :n_features ]
196
+ self . y_transformed_ = orth (qxy_transformed [:, n_features :])
209
197
else :
210
- X_transformed = X - X .mean (0 )
211
- y_transformed = orth (y - y .mean (0 ))
198
+ self .X_transformed_ = X - X .mean (0 )
199
+ self .y_transformed_ = orth (y - y .mean (0 ))
200
+
201
+ # initiated with -1
202
+ indices = np .full (self .n_features_to_select , - 1 , dtype = np .intc , order = "F" )
203
+ indices [: indices_include .size ] = indices_include
204
+ scores = np .zeros (self .n_features_to_select , dtype = float , order = "F" )
205
+ mask = np .zeros (n_features , dtype = np .ubyte , order = "F" )
206
+ mask [indices_exclude ] = True
212
207
213
- indices , scores = self ._prepare_data (
214
- indices_include ,
215
- )
216
208
n_threads = _openmp_effective_n_threads ()
217
209
_forward_search (
218
- X = X_transformed ,
219
- V = y_transformed ,
210
+ X = self . X_transformed_ ,
211
+ V = self . y_transformed_ ,
220
212
t = self .n_features_to_select ,
221
213
tol = self .tol ,
222
214
num_threads = n_threads ,
223
215
verbose = self .verbose ,
216
+ mask = mask ,
224
217
indices = indices ,
225
218
scores = scores ,
226
219
)
@@ -231,34 +224,37 @@ def fit(self, X, y):
231
224
self .scores_ = scores
232
225
return self
233
226
234
- def _prepare_data (self , indices_include ):
235
- """Prepare data for _forward_search()
236
- When h-correlation method is used, n_samples_ = n_samples.
237
- When eta-cosine method is used, n_samples_ = n_features+n_outputs.
238
-
239
- Parameters
240
- ----------
241
- indices_include : array-like of shape (n_inclusions,), dtype=int
242
- The indices of the prerequisite features.
227
+ def _check_indices_params (self , indices_params , n_features ):
228
+ """Check indices_include or indices_exclude."""
229
+ if indices_params is None :
230
+ indices_params = np .zeros (0 , dtype = int )
231
+ else :
232
+ indices_params = check_array (
233
+ indices_params ,
234
+ ensure_2d = False ,
235
+ dtype = int ,
236
+ ensure_min_samples = 0 ,
237
+ )
243
238
244
- Returns
245
- -------
246
- mask : ndarray of shape (n_features,), dtype=np.ubyte, order='F'
247
- Mask for invalid candidate features.
248
- The data type is unsigned char.
239
+ if indices_params . ndim != 1 :
240
+ raise ValueError (
241
+ f"Found indices_params with dim { indices_params . ndim } , "
242
+ "but expected == 1."
243
+ )
249
244
250
- indices: ndarray of shape (n_features_to_select,), dtype=np.intc, order='F'
251
- The indices vector of selected features, initiated with -1.
252
- The data type is signed int.
245
+ if indices_params .size >= n_features :
246
+ raise ValueError (
247
+ f"The number of indices in indices_params { indices_params .size } must "
248
+ f"be < n_features { n_features } ."
249
+ )
253
250
254
- scores: ndarray of shape (n_features_to_select,), dtype=float, order='F'
255
- The h-correlation/eta-cosine of selected features.
256
- """
257
- # initiated with -1
258
- indices = np .full (self .n_features_to_select , - 1 , dtype = np .intc , order = "F" )
259
- indices [: indices_include .size ] = indices_include
260
- scores = np .zeros (self .n_features_to_select , dtype = float , order = "F" )
261
- return indices , scores
251
+ if np .any ((indices_params < 0 ) | (indices_params >= n_features )):
252
+ raise ValueError (
253
+ "Out of bounds. "
254
+ f"All items in indices_params should be in [0, { n_features } ). "
255
+ f"But got indices_params = { indices_params } ."
256
+ )
257
+ return indices_params
262
258
263
259
def _get_support_mask (self ):
264
260
check_is_fitted (self )
0 commit comments