Skip to content

Commit f0aa477

Browse files
author
igor_rukhovich
committed
Changed the print function (makes print shorter)
1 parent e38ffaf commit f0aa477

File tree

3 files changed

+74
-29
lines changed

3 files changed

+74
-29
lines changed

modelbuilders/lgbm_mb.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from typing import Tuple
1111

1212

13-
from bench import load_data, measure_function_time, parse_args, print_output, rmse_score
14-
from utils import get_accuracy
13+
from bench import load_data, measure_function_time, parse_args, rmse_score
14+
from utils import get_accuracy, print_output
1515

1616

1717
parser = argparse.ArgumentParser(
@@ -80,17 +80,17 @@
8080
if 'OMP_NUM_THREADS' in environ.keys():
8181
lgbm_params['nthread'] = int(environ['OMP_NUM_THREADS'])
8282

83-
columns: Tuple[str, ...] = ('batch', 'arch', 'prefix', 'function',
84-
'threads', 'dtype', 'size', 'num_trees')
83+
columns: Tuple[str, ...] = ('batch', 'arch', 'prefix', 'function', 'prep_function',
84+
'threads', 'dtype', 'size', 'num_trees', 'time', 'prep_time')
8585

8686
if params.objective.startswith('reg'):
8787
task = 'regression'
8888
metric_name, metric_func = 'rmse', rmse_score
89-
columns += ('rmse', 'time')
89+
columns += ('rmse',)
9090
else:
9191
task = 'classification'
9292
metric_name, metric_func = 'accuracy[%]', get_accuracy
93-
columns += ('n_classes', 'accuracy', 'time')
93+
columns += ('n_classes', 'accuracy')
9494
if 'cudf' in str(type(y_train)):
9595
params.n_classes = y_train[y_train.columns[0]].nunique()
9696
else:
@@ -107,11 +107,13 @@
107107
t_train, model_lgbm = measure_function_time(
108108
lgbm.train, lgbm_params, lgbm_train, params=params, num_boost_round=params.n_estimators,
109109
valid_sets=lgbm_train, verbose_eval=False)
110-
y_train_pred = model_lgbm.predict(X_train)
111-
train_metric = metric_func(y_train, y_train_pred)
110+
train_metric = None
111+
if X_train != X_test:
112+
y_train_pred = model_lgbm.predict(X_train)
113+
train_metric = metric_func(y_train, y_train_pred)
112114

113115
t_lgbm_pred, y_test_pred = measure_function_time(model_lgbm.predict, X_test, params=params)
114-
test_metric_xgb = metric_func(y_test, y_test_pred)
116+
test_metric_lgbm = metric_func(y_test, y_test_pred)
115117

116118
t_trans, model_daal = measure_function_time(
117119
daal4py.get_gbt_model_from_lightgbm, model_lgbm, params=params)
@@ -130,12 +132,10 @@
130132

131133
print_output(
132134
library='modelbuilders', algorithm=f'lightgbm_{task}_and_modelbuilder',
133-
stages=['lgbm_train_matrix_create', 'lgbm_test_matrix_create', 'lgbm_training',
134-
'lgbm_prediction', 'lgbm_to_daal_conv', 'daal_prediction'],
135+
stages=['lgbm_train', 'lgbm_predict', 'daal_predict'],
135136
columns=columns, params=params,
136137
functions=['lgbm_dataset', 'lgbm_dataset', 'lgbm_train', 'lgbm_predict', 'lgbm_to_daal',
137138
'daal_compute'],
138-
times=[t_creat_train, t_creat_test, t_train, t_lgbm_pred, t_trans, t_daal_pred],
139-
accuracy_type=metric_name, accuracies=[0, 0, train_metric, test_metric_xgb, 0,
140-
test_metric_daal],
141-
data=[X_train, X_test, X_train, X_test, X_train, X_test])
139+
times=[t_creat_train, t_train, t_creat_test, t_lgbm_pred, t_trans, t_daal_pred],
140+
accuracy_type=metric_name, accuracies=[train_metric, test_metric_lgbm, test_metric_daal],
141+
data=[X_train, X_test, X_test])

modelbuilders/utils.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# SPDX-License-Identifier: MIT
44

55

6+
from bench import print_header, print_row
7+
import json
68
import numpy as np
79

810

@@ -21,3 +23,46 @@ def get_accuracy(true_labels, prediction):
2123
if true_labels[i] != pred_label:
2224
errors += 1
2325
return 100 * (1 - errors/len(true_labels))
26+
27+
28+
def print_output(library, algorithm, stages, columns, params, functions,
29+
times, accuracy_type, accuracies, data):
30+
if params.output_format == 'csv':
31+
print_header(columns, params)
32+
for i in range(len(accuracies)):
33+
print_row(
34+
columns, params, prep_function=functions[2 * i],
35+
function=functions[2 * i + 1],
36+
time=times[2 * i], prep_time=times[2 * i + 1],
37+
accuracy=accuracies[i])
38+
elif params.output_format == 'json':
39+
output = []
40+
for i in range(len(stages)):
41+
result = {
42+
'library': library,
43+
'algorithm': algorithm,
44+
'stage': stages[i],
45+
'input_data': {
46+
'data_format': params.data_format,
47+
'data_order': params.data_order,
48+
'data_type': str(params.dtype),
49+
'dataset_name': params.dataset_name,
50+
'rows': data[i].shape[0],
51+
'columns': data[i].shape[1]
52+
}
53+
}
54+
if stages[i] == 'daal4py_predict':
55+
result.update({'conversion_to_daal4py': times[2 * i],
56+
'prediction_time': times[2 * i + 1]})
57+
elif 'train' in stages[i]:
58+
result.update({'matrix_creation_time': times[2 * i],
59+
'training_time': times[2 * i + 1]})
60+
else:
61+
result.update({'matrix_creation_time': times[2 * i],
62+
'prediction_time': times[2 * i + 1]})
63+
if accuracies[i] is not None:
64+
result.update({f'{accuracy_type}': accuracies[i]})
65+
if hasattr(params, 'n_classes'):
66+
result['input_data'].update({'classes': params.n_classes})
67+
output.append(result)
68+
print(json.dumps(output, indent=4))

modelbuilders/xgb_mb.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
import xgboost as xgb
1111

1212

13-
from bench import load_data, measure_function_time, parse_args, print_output, rmse_score
14-
from utils import get_accuracy
13+
from bench import load_data, measure_function_time, parse_args, rmse_score
14+
from utils import get_accuracy, print_output
1515

1616

1717
parser = argparse.ArgumentParser(
@@ -98,17 +98,17 @@
9898
if 'OMP_NUM_THREADS' in environ.keys():
9999
xgb_params['nthread'] = int(environ['OMP_NUM_THREADS'])
100100

101-
columns: Tuple[str, ...] = ('batch', 'arch', 'prefix', 'function',
102-
'threads', 'dtype', 'size', 'num_trees')
101+
columns: Tuple[str, ...] = ('batch', 'arch', 'prefix', 'function', 'prep_function',
102+
'threads', 'dtype', 'size', 'num_trees', 'time', 'prep_time')
103103

104104
if params.objective.startswith('reg'):
105105
task = 'regression'
106106
metric_name, metric_func = 'rmse', rmse_score
107-
columns += ('rmse', 'time')
107+
columns += ('rmse',)
108108
else:
109109
task = 'classification'
110110
metric_name, metric_func = 'accuracy[%]', get_accuracy
111-
columns += ('n_classes', 'accuracy', 'time')
111+
columns += ('n_classes', 'accuracy')
112112
if 'cudf' in str(type(y_train)):
113113
params.n_classes = y_train[y_train.columns[0]].nunique()
114114
else:
@@ -134,8 +134,10 @@ def predict():
134134

135135
t_train, model_xgb = measure_function_time(
136136
fit, None if params.count_dmatrix else dtrain, params=params)
137-
y_train_pred = model_xgb.predict(dtrain)
138-
train_metric = metric_func(y_train, y_train_pred)
137+
train_metric = None
138+
if X_train != X_test:
139+
y_train_pred = model_xgb.predict(dtrain)
140+
train_metric = metric_func(y_train, y_train_pred)
139141

140142
t_xgb_pred, y_test_pred = measure_function_time(predict, params=params)
141143
test_metric_xgb = metric_func(y_test, y_test_pred)
@@ -157,12 +159,10 @@ def predict():
157159

158160
print_output(
159161
library='modelbuilders', algorithm=f'xgboost_{task}_and_modelbuilder',
160-
stages=['xgb_train_dmatrix_create', 'xgb_test_dmatrix_create', 'xgb_training', 'xgb_prediction',
161-
'xgb_to_daal_conv', 'daal_prediction'],
162+
stages=['xgboost_train', 'xgboost_predict', 'daal4py_predict'],
162163
columns=columns, params=params,
163164
functions=['xgb_dmatrix', 'xgb_dmatrix', 'xgb_train', 'xgb_predict', 'xgb_to_daal',
164165
'daal_compute'],
165-
times=[t_creat_train, t_creat_test, t_train, t_xgb_pred, t_trans, t_daal_pred],
166-
accuracy_type=metric_name, accuracies=[0, 0, train_metric, test_metric_xgb, 0,
167-
test_metric_daal],
168-
data=[X_train, X_test, X_train, X_test, X_train, X_test])
166+
times=[t_creat_train, t_train, t_creat_test, t_xgb_pred, t_trans, t_daal_pred],
167+
accuracy_type=metric_name, accuracies=[train_metric, test_metric_xgb, test_metric_daal],
168+
data=[X_train, X_test, X_test])

0 commit comments

Comments
 (0)