@@ -156,32 +156,32 @@ __verbose__ : int, default=0
156
156
import pandas as pd
157
157
from sklearn.ensemble import RandomForestClassifier
158
158
from boruta import BorutaPy
159
-
159
+
160
160
# load X and y
161
161
# NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
162
- X = pd.read_csv('my_X_table.csv', index_col=0).values
163
- y = pd.read_csv('my_y_vector.csv', index_col=0).values
164
-
162
+ X = pd.read_csv('examples/test_X.csv', index_col=0).values
163
+ y = pd.read_csv('examples/test_y.csv', header=None, index_col=0).values
164
+ y = y.ravel()
165
+
165
166
# define random forest classifier, with utilising all cores and
166
167
# sampling in proportion to y labels
167
168
rf = RandomForestClassifier(n_jobs=-1, class_weight='auto', max_depth=5)
168
-
169
+
169
170
# define Boruta feature selection method
170
- feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2)
171
-
172
- # find all relevant features
171
+ feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1 )
172
+
173
+ # find all relevant features - 5 features should be selected
173
174
feat_selector.fit(X, y)
174
-
175
- # check selected features
175
+
176
+ # check selected features - first 5 features are selected
176
177
feat_selector.support_
177
-
178
+
178
179
# check ranking of features
179
180
feat_selector.ranking_
180
-
181
+
181
182
# call transform() on X to filter it down to selected features
182
183
X_filtered = feat_selector.transform(X)
183
184
184
-
185
185
## References ##
186
186
187
187
1 . Kursa M., Rudnicki W., "Feature Selection with the Boruta Package" Journal of Statistical Software, Vol. 36, Issue 11, Sep 2010
0 commit comments