Skip to content

Commit 972efac

Browse files
OnlyDenikodenis.kulandin
andauthored
Extended output result, new metrics and minor fixes (#81)
* minor fixes * pep8 * random state * size of datasets * apply comments * dbscan eps fix * extend output result * codefactor.io * pep8 * fix ci * fix * whitespace * return to stock functions * remove debug putput * apply comments * metrics & metric_type * pep8 * pca * roc_auc details section * pep8 * finally solve roc_auc trouble * add kmeans.iter_ & done metrics in bench * n_iter_ * stay columnwise_score because of xgb * roc_auc_score binary case * add n_sv in svms * apply comments Co-authored-by: denis.kulandin <[email protected]>
1 parent e57be6c commit 972efac

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+461
-250
lines changed

bench.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -338,20 +338,47 @@ def columnwise_score(y, yp, score_func):
338338
return score_func(y, yp)
339339

340340

341-
def accuracy_score(y, yp):
342-
return columnwise_score(y, yp, lambda y1, y2: np.mean(y1 == y2))
341+
def accuracy_score(y_true, y_pred):
342+
return columnwise_score(y_true, y_pred, lambda y1, y2: np.mean(y1 == y2))
343343

344344

345-
def log_loss(y, yp):
345+
def log_loss(y_true, y_pred):
346346
from sklearn.metrics import log_loss as sklearn_log_loss
347-
y = convert_to_numpy(y)
348-
yp = convert_to_numpy(yp)
349-
return sklearn_log_loss(y, yp)
347+
y_true = convert_to_numpy(y_true)
348+
y_pred = convert_to_numpy(y_pred)
349+
return sklearn_log_loss(y_true, y_pred)
350+
350351

352+
def roc_auc_score(y_true, y_pred, multi_class='ovr'):
353+
from sklearn.metrics import roc_auc_score as sklearn_roc_auc
354+
y_true = convert_to_numpy(y_true)
355+
y_pred = convert_to_numpy(y_pred)
356+
if y_pred.shape[1] == 2: # binary case
357+
y_pred = y_pred[:, 1]
358+
return sklearn_roc_auc(y_true, y_pred, multi_class=multi_class)
351359

352-
def rmse_score(y, yp):
360+
361+
def rmse_score(y_true, y_pred):
353362
return columnwise_score(
354-
y, yp, lambda y1, y2: float(np.sqrt(np.mean((y1 - y2)**2))))
363+
y_true, y_pred, lambda y1, y2: float(np.sqrt(np.mean((y1 - y2)**2))))
364+
365+
366+
def r2_score(y_true, y_pred):
367+
from sklearn.metrics import r2_score as sklearn_r2_score
368+
y_true = convert_to_numpy(y_true)
369+
y_pred = convert_to_numpy(y_pred)
370+
return sklearn_r2_score(y_true, y_pred)
371+
372+
373+
def davies_bouldin_score(X, labels):
374+
from sklearn.metrics.cluster import davies_bouldin_score as sklearn_dbs
375+
X = convert_to_numpy(X)
376+
labels = convert_to_numpy(labels)
377+
try:
378+
res = sklearn_dbs(X, labels)
379+
except ValueError as ex:
380+
res = ex
381+
return res
355382

356383

357384
def convert_data(data, dtype, data_order, data_format):
@@ -488,16 +515,21 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
488515

489516

490517
def print_output(library, algorithm, stages, params, functions,
491-
times, accuracy_type, accuracies, data, alg_instance=None,
518+
times, metric_type, metrics, data, alg_instance=None,
492519
alg_params=None):
493520
if params.output_format == 'json':
494521
output = []
495522
for i in range(len(stages)):
496523
result = gen_basic_dict(library, algorithm, stages[i], params,
497524
data[i], alg_instance, alg_params)
498525
result.update({'time[s]': times[i]})
499-
if accuracy_type is not None:
500-
result.update({f'{accuracy_type}': accuracies[i]})
526+
if metric_type is not None:
527+
if isinstance(metric_type, str):
528+
result.update({f'{metric_type}': metrics[i]})
529+
elif isinstance(metric_type, list):
530+
for ind, val in enumerate(metric_type):
531+
if metrics[ind][i] is not None:
532+
result.update({f'{val}': metrics[ind][i]})
501533
if hasattr(params, 'n_classes'):
502534
result['input_data'].update({'classes': params.n_classes})
503535
if hasattr(params, 'n_clusters'):

configs/blogs/skl_2021_3.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@
307307
}
308308
],
309309
"nu": [0.25],
310-
"kernel": ["sigmoid"]
310+
"kernel": ["poly"]
311311
},
312312
{
313313
"algorithm": "svr",

cuml_bench/dbscan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,5 @@
4848

4949
bench.print_output(library='cuml', algorithm='dbscan', stages=['training'],
5050
params=params, functions=['DBSCAN'], times=[time],
51-
accuracies=[acc], accuracy_type='davies_bouldin_score', data=[X],
51+
metrics=[acc], metric_type='davies_bouldin_score', data=[X],
5252
alg_instance=dbscan)

cuml_bench/df_clsf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,6 @@ def predict(X):
9797
bench.print_output(library='cuml', algorithm='decision_forest_classification',
9898
stages=['training', 'prediction'],
9999
params=params, functions=['df_clsf.fit', 'df_clsf.predict'],
100-
times=[fit_time, predict_time], accuracy_type='accuracy[%]',
101-
accuracies=[train_acc, test_acc], data=[X_train, X_test],
100+
times=[fit_time, predict_time], metric_type='accuracy[%]',
101+
metrics=[train_acc, test_acc], data=[X_train, X_test],
102102
alg_instance=clf)

cuml_bench/df_regr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,6 @@ def predict(X):
9393
bench.print_output(library='cuml', algorithm='decision_forest_regression',
9494
stages=['training', 'prediction'], params=params,
9595
functions=['df_regr.fit', 'df_regr.predict'],
96-
times=[fit_time, predict_time], accuracy_type='rmse',
97-
accuracies=[train_rmse, test_rmse], data=[X_train, X_test],
96+
times=[fit_time, predict_time], metric_type='rmse',
97+
metrics=[train_rmse, test_rmse], data=[X_train, X_test],
9898
alg_instance=regr)

cuml_bench/elasticnet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,6 @@
5656
bench.print_output(library='cuml', algorithm='elastic-net',
5757
stages=['training', 'prediction'], params=params,
5858
functions=['ElasticNet.fit', 'ElasticNet.predict'],
59-
times=[fit_time, predict_time], accuracy_type='rmse',
60-
accuracies=[train_rmse, test_rmse], data=[X_train, X_train],
59+
times=[fit_time, predict_time], metric_type='rmse',
60+
metrics=[train_rmse, test_rmse], data=[X_train, X_train],
6161
alg_instance=regr)

cuml_bench/kmeans.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,6 @@ def kmeans_fit(X):
8888
bench.print_output(library='cuml', algorithm='kmeans',
8989
stages=['training', 'prediction'], params=params,
9090
functions=['KMeans.fit', 'KMeans.predict'],
91-
times=[fit_time, predict_time], accuracy_type='davies_bouldin_score',
92-
accuracies=[acc_train, acc_test], data=[X_train, X_test],
91+
times=[fit_time, predict_time], metric_type='davies_bouldin_score',
92+
metrics=[acc_train, acc_test], data=[X_train, X_test],
9393
alg_instance=kmeans)

cuml_bench/knn_clsf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,13 @@
6868
stages=['training', 'prediction'], params=params,
6969
functions=['knn_clsf.fit', 'knn_clsf.predict'],
7070
times=[train_time, predict_time],
71-
accuracies=[train_acc, test_acc], accuracy_type='accuracy[%]',
71+
metrics=[train_acc, test_acc], metric_type='accuracy[%]',
7272
data=[X_train, X_test], alg_instance=knn_clsf)
7373
else:
7474
bench.print_output(library='cuml',
7575
algorithm=knn_clsf.algorithm + '_knn_search',
7676
stages=['training', 'search'], params=params,
7777
functions=['knn_clsf.fit', 'knn_clsf.kneighbors'],
7878
times=[train_time, predict_time],
79-
accuracies=[], accuracy_type=None,
79+
metrics=[], metric_type=None,
8080
data=[X_train, X_test], alg_instance=knn_clsf)

cuml_bench/lasso.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
bench.print_output(library='sklearn', algorithm='lasso',
5454
stages=['training', 'prediction'],
5555
params=params, functions=['Lasso.fit', 'Lasso.predict'],
56-
times=[fit_time, predict_time], accuracy_type='rmse',
57-
accuracies=[train_rmse, test_rmse], data=[X_train, X_test],
56+
times=[fit_time, predict_time], metric_type='rmse',
57+
metrics=[train_rmse, test_rmse], data=[X_train, X_test],
5858
alg_instance=regr)

cuml_bench/linear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,6 @@
5050
bench.print_output(library='cuml', algorithm='linear_regression',
5151
stages=['training', 'prediction'], params=params,
5252
functions=['Linear.fit', 'Linear.predict'],
53-
times=[fit_time, predict_time], accuracy_type='rmse',
54-
accuracies=[train_rmse, test_rmse], data=[X_train, X_test],
53+
times=[fit_time, predict_time], metric_type='rmse',
54+
metrics=[train_rmse, test_rmse], data=[X_train, X_test],
5555
alg_instance=regr)

0 commit comments

Comments
 (0)