-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path50_random_forest_ensemble.py
More file actions
22 lines (16 loc) · 926 Bytes
/
50_random_forest_ensemble.py
File metadata and controls
22 lines (16 loc) · 926 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Rather than considering all features when choosing a split point,
# random forest limits the features to a random subset of features,
# such as 3 if there were 10 features.
from numpy import mean, std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
X,Y = make_classification(random_state=1)
# number of randomly selected features to consider at each split point
# via the “max_features” argument, which is set to the square root of
# the number of features in your dataset by default.
model = RandomForestClassifier(n_estimators=50)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, Y, scoring='accuracy', cv=cv, n_jobs=1)
# report ensemble performance
print('Mean Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))