15
15
# ===============================================================================
16
16
17
17
import argparse
18
-
19
18
import bench
20
19
from cuml .ensemble import RandomForestRegressor
21
20
22
21
parser = argparse .ArgumentParser (description = 'cuml random forest '
23
22
'regression benchmark' )
24
23
25
- parser .add_argument ('--criterion' , type = str , default = 'mse' ,
26
- choices = ('mse' , 'mae' ),
27
- help = 'The function to measure the quality of a split' )
28
24
parser .add_argument ('--split-algorithm' , type = str , default = 'hist' ,
29
25
choices = ('hist' , 'global_quantile' ),
30
26
help = 'The algorithm to determine how '
31
27
'nodes are split in the tree' )
32
28
parser .add_argument ('--num-trees' , type = int , default = 100 ,
33
29
help = 'Number of trees in the forest' )
34
- parser .add_argument ('--max-features' , type = bench .float_or_int , default = None ,
30
+ parser .add_argument ('--max-features' , type = bench .float_or_int , default = 1.0 ,
35
31
help = 'Upper bound on features used at each split' )
36
- parser .add_argument ('--max-depth' , type = int , default = None ,
32
+ parser .add_argument ('--max-depth' , type = int , default = 16 ,
37
33
help = 'Upper bound on depth of constructed trees' )
38
34
parser .add_argument ('--min-samples-split' , type = bench .float_or_int , default = 2 ,
39
35
help = 'Minimum samples number for node splitting' )
40
36
parser .add_argument ('--max-leaf-nodes' , type = int , default = - 1 ,
41
37
help = 'Maximum leaf nodes per tree' )
42
- parser .add_argument ('--min-impurity-decrease' , type = float , default = 0. ,
38
+ parser .add_argument ('--min-impurity-decrease' , type = float , default = 0.0 ,
43
39
help = 'Needed impurity decrease for node splitting' )
44
40
parser .add_argument ('--no-bootstrap' , dest = 'bootstrap' , default = True ,
45
41
action = 'store_false' , help = "Don't control bootstraping" )
46
42
47
43
params = bench .parse_args (parser )
48
44
49
45
# Load and convert data
50
- X_train , X_test , y_train , y_test = bench .load_data (params )
51
-
52
- if params .criterion == 'mse' :
53
- params .criterion = 2
54
- else :
55
- params .criterion = 3
46
+ X_train , X_test , y_train , y_test = bench .load_data (params , int_label = True )
56
47
57
48
if params .split_algorithm == 'hist' :
58
49
params .split_algorithm = 0
61
52
62
53
# Create our random forest regressor
63
54
regr = RandomForestRegressor (
64
- split_criterion = params .criterion ,
65
- split_algo = params .split_algorithm ,
66
55
n_estimators = params .num_trees ,
67
- max_depth = params .max_depth ,
56
+ split_algo = params .split_algorithm ,
68
57
max_features = params .max_features ,
69
58
min_samples_split = params .min_samples_split ,
59
+ max_depth = params .max_depth ,
70
60
max_leaves = params .max_leaf_nodes ,
71
61
min_impurity_decrease = params .min_impurity_decrease ,
72
62
bootstrap = params .bootstrap ,
63
+
73
64
)
74
65
75
66
@@ -82,7 +73,6 @@ def predict(regr, X):
82
73
83
74
84
75
fit_time , _ = bench .measure_function_time (fit , regr , X_train , y_train , params = params )
85
-
86
76
y_pred = predict (regr , X_train )
87
77
train_rmse = bench .rmse_score (y_pred , y_train )
88
78
0 commit comments