1
1
function warn_double_spec (arg, model)
2
2
return " Using `model=$arg `. Ignoring keyword specification `model=$model `. "
3
3
end
4
-
4
+
5
5
const ERR_SPECIFY_MODEL = ArgumentError (
6
6
" You need to specify model as positional argument or specify `model=...`."
7
7
)
@@ -36,66 +36,67 @@ for (ModelType, ModelSuperType) in MODELTYPE_GIVEN_SUPERTYPES
36
36
eval (ex)
37
37
end
38
38
39
- eval (:(const RFE{M} = Union{$ ((Expr (:curly , modeltype, :M ) for modeltype in MODEL_TYPES). .. )}))
39
+ eval (:(const RFE{M} =
40
+ Union{$ ((Expr (:curly , modeltype, :M ) for modeltype in MODEL_TYPES). .. )}))
40
41
41
42
# Common keyword constructor for both model types
42
43
"""
43
44
RecursiveFeatureElimination(model, n_features, step)
44
45
45
- This model implements a recursive feature elimination algorithm for feature selection.
46
- It recursively removes features, training a base model on the remaining features and
46
+ This model implements a recursive feature elimination algorithm for feature selection.
47
+ It recursively removes features, training a base model on the remaining features and
47
48
evaluating their importance until the desired number of features is selected.
48
49
49
- Construct an instance with default hyper-parameters using the syntax
50
- `model = RecursiveFeatureElimination(model=...)`. Provide keyword arguments to override
51
- hyper-parameter defaults.
52
-
50
+ Construct an instance with default hyper-parameters using the syntax
51
+ `rfe_model = RecursiveFeatureElimination(model=...)`. Provide keyword arguments to override
52
+ hyper-parameter defaults.
53
+
53
54
# Training data
54
- In MLJ or MLJBase, bind an instance `model ` to data with
55
+ In MLJ or MLJBase, bind an instance `rfe_model ` to data with
55
56
56
- mach = machine(model , X, y)
57
+ mach = machine(rfe_model , X, y)
57
58
58
59
OR, if the base model supports weights, as
59
60
60
- mach = machine(model , X, y, w)
61
+ mach = machine(rfe_model , X, y, w)
61
62
62
63
Here:
63
64
64
65
- `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype
65
- as that required by the base model; check column scitypes with `schema(X)` and column
66
+ as that required by the base model; check column scitypes with `schema(X)` and column
66
67
scitypes required by base model with `input_scitype(basemodel)`.
67
68
68
- - `y` is the target, which can be any table of responses whose element scitype is
69
- `Continuous` or `Finite` depending on the `target_scitype` required by the base model;
69
+ - `y` is the target, which can be any table of responses whose element scitype is
70
+ `Continuous` or `Finite` depending on the `target_scitype` required by the base model;
70
71
check the scitype with `scitype(y)`.
71
72
72
- - `w` is the observation weights which can either be `nothing`(default) or an
73
- `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different
73
+ - `w` is the observation weights which can either be `nothing`(default) or an
74
+ `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different
74
75
from `weights` kernel which is an hyperparameter to the model, see below.
75
76
76
77
Train the machine using `fit!(mach, rows=...)`.
77
78
78
79
# Hyper-parameters
79
- - model: A base model with a `fit` method that provides information on feature
80
+ - model: A base model with a `fit` method that provides information on feature
80
81
feature importance (i.e `reports_feature_importances(model) == true`)
81
82
82
- - n_features::Real = 0: The number of features to select. If `0`, half of the
83
- features are selected. If a positive integer, the parameter is the absolute number
84
- of features to select. If a real number between 0 and 1, it is the fraction of features
83
+ - n_features::Real = 0: The number of features to select. If `0`, half of the
84
+ features are selected. If a positive integer, the parameter is the absolute number
85
+ of features to select. If a real number between 0 and 1, it is the fraction of features
85
86
to select.
86
87
87
- - step::Real=1: If the value of step is at least 1, it signifies the quantity of features to
88
- eliminate in each iteration. Conversely, if step falls strictly within the range of
88
+ - step::Real=1: If the value of step is at least 1, it signifies the quantity of features to
89
+ eliminate in each iteration. Conversely, if step falls strictly within the range of
89
90
0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.
90
91
91
92
# Operations
92
93
93
- - `transform(mach, X)`: transform the input table `X` into a new table containing only
94
+ - `transform(mach, X)`: transform the input table `X` into a new table containing only
94
95
columns corresponding to features gotten from the RFE algorithm.
95
96
96
- - `predict(mach, X)`: transform the input table `X` into a new table same as in
97
+ - `predict(mach, X)`: transform the input table `X` into a new table same as in
97
98
98
- - `transform(mach, X)` above and predict using the fitted base model on the
99
+ - `transform(mach, X)` above and predict using the fitted base model on the
99
100
transformed table.
100
101
101
102
# Fitted parameters
@@ -106,11 +107,11 @@ The fields of `fitted_params(mach)` are:
106
107
107
108
# Report
108
109
The fields of `report(mach)` are:
109
- - `ranking`: The feature ranking of each features in the training dataset.
110
+ - `ranking`: The feature ranking of each features in the training dataset.
110
111
111
112
- `model_report`: report for the fitted base model.
112
113
113
- - `features`: names of features seen during the training process.
114
+ - `features`: names of features seen during the training process.
114
115
115
116
# Examples
116
117
```
@@ -131,10 +132,10 @@ selector = RecursiveFeatureElimination(model = rf)
131
132
mach = machine(selector, X, y)
132
133
fit!(mach)
133
134
134
- # view the feature importances
135
+ # view the feature importances
135
136
feature_importances(mach)
136
137
137
- # predict using the base model
138
+ # predict using the base model
138
139
Xnew = MLJ.table(rand(rng, 50, 10));
139
140
predict(mach, Xnew)
140
141
@@ -160,7 +161,7 @@ function RecursiveFeatureElimination(
160
161
# TODO : Check that the specifed model implements the predict method.
161
162
# probably add a trait to check this
162
163
MMI. reports_feature_importances (model) || throw (ERR_FEATURE_IMPORTANCE_SUPPORT)
163
- if model isa Deterministic
164
+ if model isa Deterministic
164
165
selector = DeterministicRecursiveFeatureElimination {typeof(model)} (
165
166
model, Float64 (n_features), Float64 (step)
166
167
)
@@ -170,7 +171,7 @@ function RecursiveFeatureElimination(
170
171
)
171
172
else
172
173
throw (ERR_MODEL_TYPE)
173
- end
174
+ end
174
175
message = MMI. clean! (selector)
175
176
isempty (message) || @warn (message)
176
177
return selector
@@ -204,21 +205,21 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
204
205
n_features_select = selector. n_features
205
206
# # zero indicates that half of the features be selected.
206
207
if n_features_select == 0
207
- n_features_select = div (nfeatures, 2 )
208
+ n_features_select = div (nfeatures, 2 )
208
209
elseif 0 < n_features_select < 1
209
210
n_features_select = round (Int, n_features_select * nfeatures)
210
211
else
211
212
n_features_select = round (Int, n_features_select)
212
213
end
213
214
214
215
step = selector. step
215
-
216
+
216
217
if 0 < step < 1
217
218
step = round (Int, max (1 , step * n_features_select))
218
219
else
219
- step = round (Int, step)
220
+ step = round (Int, step)
220
221
end
221
-
222
+
222
223
support = trues (nfeatures)
223
224
ranking = ones (Int, nfeatures) # every feature has equal rank initially
224
225
mask = trues (nfeatures) # for boolean indexing of ranking vector in while loop below
@@ -230,7 +231,7 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
230
231
# Rank the remaining features
231
232
model = selector. model
232
233
verbosity > 0 && @info (" Fitting estimator with $(n_features_left) features." )
233
-
234
+
234
235
data = MMI. reformat (model, MMI. selectcols (X, features_left), args... )
235
236
236
237
fitresult, _, report = MMI. fit (model, verbosity - 1 , data... )
@@ -263,14 +264,14 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
263
264
data = MMI. reformat (selector. model, MMI. selectcols (X, features_left), args... )
264
265
verbosity > 0 && @info (" Fitting estimator with $(n_features_left) features." )
265
266
model_fitresult, _, model_report = MMI. fit (selector. model, verbosity - 1 , data... )
266
-
267
+
267
268
fitresult = (
268
269
support = support,
269
270
model_fitresult = model_fitresult,
270
271
features_left = features_left,
271
272
features = features
272
273
)
273
- report = (
274
+ report = (
274
275
ranking = ranking,
275
276
model_report = model_report
276
277
)
294
295
295
296
function MMI. transform (:: RFE , fitresult, X)
296
297
sch = Tables. schema (Tables. columns (X))
297
- if (length (fitresult. features) == length (sch. names) &&
298
+ if (length (fitresult. features) == length (sch. names) &&
298
299
! all (e -> e in sch. names, fitresult. features))
299
300
throw (
300
301
ERR_FEATURES_SEEN
@@ -312,7 +313,7 @@ function MMI.save(model::RFE, fitresult)
312
313
atomic_fitresult = fitresult. model_fitresult
313
314
features_left = fitresult. features_left
314
315
features = fitresult. features
315
-
316
+
316
317
atom = model. model
317
318
return (
318
319
support = copy (support),
@@ -337,14 +338,12 @@ function MMI.restore(model::RFE, serializable_fitresult)
337
338
)
338
339
end
339
340
340
- # # Traits definitions
341
- function MMI. load_path (:: Type{<:DeterministicRecursiveFeatureElimination} )
342
- return " FeatureSelection.DeterministicRecursiveFeatureElimination"
343
- end
341
+ # # Trait definitions
344
342
345
- function MMI. load_path (:: Type{<:ProbabilisticRecursiveFeatureElimination} )
346
- return " FeatureSelection.ProbabilisticRecursiveFeatureElimination"
347
- end
343
+ # load path points to constructor not type:
344
+ MMI. load_path (:: Type{<:RFE} ) = " FeatureSelection.RecursiveFeatureElimination"
345
+ MMI. constructor (:: Type{<:RFE} ) = RecursiveFeatureElimination
346
+ MMI. package_name (:: Type{<:RFE} ) = " FeatureSelection"
348
347
349
348
for trait in [
350
349
:supports_weights ,
387
386
# # TRAINING LOSSES SUPPORT
388
387
function MMI. training_losses (model:: RFE , rfe_report)
389
388
return MMI. training_losses (model. model, rfe_report. model_report)
390
- end
389
+ end
390
+
391
+ # # Pkg Traits
392
+ MMI. metadata_pkg .(
393
+ (
394
+ DeterministicRecursiveFeatureElimination,
395
+ ProbabilisticRecursiveFeatureElimination,
396
+ ),
397
+ package_name = " FeatureSelection" ,
398
+ package_uuid = " 33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ,
399
+ package_url = " https://github.com/JuliaAI/FeatureSelection.jl" ,
400
+ is_pure_julia = true ,
401
+ package_license = " MIT"
402
+ )
0 commit comments