Skip to content

Commit 01a5c60

Browse files
author
igor_rukhovich
committed
Benchmarks are done
1 parent d0b6c40 commit 01a5c60

File tree

5 files changed

+21
-50
lines changed

5 files changed

+21
-50
lines changed

configs/cpu_lgbm_gbt_config.json

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
"lib": ["modelbuilders"],
55
"data-format": ["pandas"],
66
"data-order": ["F"],
7-
"dtype": ["float32"],
8-
"count-dmatrix": [""]
7+
"dtype": ["float32"]
98
},
109
"cases": [
1110
{
@@ -22,8 +21,7 @@
2221
}
2322
],
2423
"n-estimators": [100],
25-
"objective": ["reg:squarederror"],
26-
"tree-method": ["hist"],
24+
"objective": ["regression"],
2725
"max-depth": [8],
2826
"scale-pos-weight": [2],
2927
"learning-rate": [0.1],
@@ -56,8 +54,7 @@
5654
"max-depth": [8],
5755
"max-leaves": [256],
5856
"n-estimators": [1000],
59-
"objective": ["binary:logistic"],
60-
"tree-method": ["hist"]
57+
"objective": ["binary"]
6158
},
6259
{
6360
"algorithm": "lgbm_mb",
@@ -82,8 +79,7 @@
8279
"max-depth": [8],
8380
"max-leaves": [256],
8481
"n-estimators": [1000],
85-
"objective": ["binary:logistic"],
86-
"tree-method": ["hist"]
82+
"objective": ["binary"]
8783
},
8884
{
8985
"algorithm": "lgbm_mb",
@@ -103,11 +99,10 @@
10399
"subsample": [1],
104100
"reg-lambda": [2],
105101
"min-child-weight": [1],
106-
"min-split-loss": [0.1],
102+
"min-split-gain": [0.1],
107103
"max-depth": [8],
108104
"n-estimators": [200],
109-
"objective": ["multi:softprob"],
110-
"tree-method": ["hist"]
105+
"objective": ["multiclass"]
111106
}
112107
]
113108
}

modelbuilders/bench.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -136,31 +136,13 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
136136
if param_vars[file_arg].name.endswith('.npy'):
137137
data = np.load(param_vars[file_arg].name)
138138
else:
139-
data = read_csv(param_vars[file_arg].name, params)
139+
data = read_csv(param_vars[file_arg].name)
140140
full_data[element] = convert_data(
141141
data,
142142
int_dtype if 'y' in element and int_label else params.dtype,
143143
params.data_order, params.data_format
144144
)
145-
# generate and convert data if it's marked and path isn't specified
146-
if full_data[element] is None and element in generated_data:
147-
full_data[element] = convert_data(
148-
np.random.rand(*params.shape),
149-
int_dtype if 'y' in element and int_label else params.dtype,
150-
params.data_order, params.data_format)
151-
# convert existing labels from 1- to 2-dimensional
152-
# if it's forced and possible
153-
if full_data[element] is not None and 'y' in element and label_2d and hasattr(full_data[element], 'reshape'):
154-
full_data[element] = full_data[element].reshape(
155-
(full_data[element].shape[0], 1))
156-
# add dtype property to data if it's needed and doesn't exist
157-
if full_data[element] is not None and add_dtype and not hasattr(full_data[element], 'dtype'):
158-
if hasattr(full_data[element], 'values'):
159-
full_data[element].dtype = full_data[element].values.dtype
160-
elif hasattr(full_data[element], 'dtypes'):
161-
full_data[element].dtype = full_data[element].dtypes[0].type
162-
163-
params.dtype = get_dtype(full_data['X_train'])
145+
164146
# add size to parameters which is need for some cases
165147
if not hasattr(params, 'size'):
166148
params.size = size_str(full_data['X_train'].shape)
@@ -363,7 +345,7 @@ def print_output(library, algorithm, stages, columns, params, functions,
363345
print(json.dumps(output, indent=4))
364346

365347

366-
def read_csv(filename, params):
348+
def read_csv(filename):
367349
from string import ascii_lowercase, ascii_uppercase
368350

369351
# find out header existance
@@ -377,9 +359,9 @@ def read_csv(filename, params):
377359
# try to read csv with pandas and fall back to numpy reader if failed
378360
try:
379361
import pandas as pd
380-
data = pd.read_csv(filename, header=header, dtype=params.dtype).values
362+
data = pd.read_csv(filename, header=header, dtype=np.float32).values
381363
except ImportError:
382-
data = np.genfromtxt(filename, delimiter=',', dtype=params.dtype,
364+
data = np.genfromtxt(filename, delimiter=',', dtype=np.float32,
383365
skip_header=0 if header is None else 1)
384366

385367
if data.ndim == 2:

modelbuilders/lgbm_mb.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,17 @@
2929
help='Maximum delta step we allow each leaf output to be')
3030
parser.add_argument('--max-depth', type=int, default=6,
3131
help='Maximum depth of a tree')
32-
parser.add_argument('--max-leaves', type=int, default=0,
32+
parser.add_argument('--max-leaves', type=int, default=31,
3333
help='Maximum number of nodes to be added')
3434
parser.add_argument('--min-child-weight', type=float, default=1,
3535
help='Minimum sum of instance weight needed in a child')
36-
parser.add_argument('--min-split-loss', '--gamma', type=float, default=0,
36+
parser.add_argument('--min-split-gain', '--gamma', type=float, default=0,
3737
help='Minimum loss reduction required to make'
3838
' partition on a leaf node')
3939
parser.add_argument('--n-estimators', type=int, default=100,
4040
help='Number of gradient boosted trees')
4141
parser.add_argument('--objective', type=str, required=True,
42-
choices=('reg:squarederror', 'binary:logistic',
43-
'multi:softmax', 'multi:softprob'),
42+
choices=('regression', 'binary', 'multiclass'),
4443
help='Control a balance of positive and negative weights')
4544
parser.add_argument('--reg-alpha', type=float, default=0,
4645
help='L1 regularization term on weights')
@@ -50,31 +49,24 @@
5049
help='Controls a balance of positive and negative weights')
5150
parser.add_argument('--subsample', type=float, default=1,
5251
help='Subsample ratio of the training instances')
53-
parser.add_argument('--tree-method', type=str, required=True,
54-
help='The tree construction algorithm used in XGBoost')
5552

5653
params = parse_args(parser)
5754

5855
X_train, X_test, y_train, y_test = load_data(params)
5956

6057
lgbm_params = {
61-
'booster': 'gbtree',
62-
'verbosity': 0,
58+
'verbosity': -1,
6359
'learning_rate': params.learning_rate,
64-
'min_split_loss': params.min_split_loss,
60+
'min_split_gain': params.min_split_gain,
6561
'max_depth': params.max_depth,
6662
'min_child_weight': params.min_child_weight,
6763
'max_delta_step': params.max_delta_step,
6864
'subsample': params.subsample,
69-
'sampling_method': 'uniform',
7065
'colsample_bytree': params.colsample_bytree,
71-
'colsample_bylevel': 1,
7266
'colsample_bynode': 1,
7367
'reg_lambda': params.reg_lambda,
7468
'reg_alpha': params.reg_alpha,
75-
'tree_method': params.tree_method,
7669
'scale_pos_weight': params.scale_pos_weight,
77-
'grow_policy': params.grow_policy,
7870
'max_leaves': params.max_leaves,
7971
'max_bin': params.max_bin,
8072
'objective': params.objective,
@@ -113,10 +105,10 @@
113105
t_train, model_lgbm = measure_function_time(lgbm.train, lgbm_params, lgbm_train, params=params,
114106
num_boost_round=params.n_estimators, valid_sets=lgbm_train,
115107
verbose_eval=False)
116-
y_train_pred = model_lgbm.predict(lgbm_train)
108+
y_train_pred = model_lgbm.predict(X_train)
117109
train_metric = metric_func(y_train, y_train_pred)
118110

119-
t_lgbm_pred, y_test_pred = measure_function_time(model_lgbm.predict, lgbm_test, params=params)
111+
t_lgbm_pred, y_test_pred = measure_function_time(model_lgbm.predict, X_test, params=params)
120112
test_metric_xgb = metric_func(y_test, y_test_pred)
121113

122114
t_trans, model_daal = measure_function_time(daal4py.get_gbt_model_from_lightgbm, model_lgbm, params=params)
@@ -138,4 +130,4 @@
138130
'lgbm_predict', 'lgbm_to_daal', 'daal_compute'],
139131
times=[t_creat_train, t_creat_test, t_train, t_lgbm_pred, t_trans, t_daal_pred],
140132
accuracy_type=metric_name, accuracies=[0, 0, train_metric, test_metric_xgb, 0, test_metric_daal],
141-
data=[X_train, X_test, X_train, X_test, X_train, X_test])
133+
data=[X_train, X_test, X_train, X_test, X_train, X_test])

modelbuilders/xgb_mb.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import daal4py
77
import numpy as np
88
from os import environ
9+
from sys import stderr
910
from timeit import default_timer as timer
1011
from typing import Tuple
1112
import xgboost as xgb

runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ class GenerationArgs:
289289
try:
290290
json_result['results'].extend(json.loads(stdout))
291291
except json.JSONDecodeError:
292+
print("UNABLE TO PARSE, ", stdout)
292293
pass
293294
elif args.output_format == 'csv':
294295
csv_result += stdout + '\n'

0 commit comments

Comments
 (0)