Skip to content

Commit eab0ddb

Browse files
rabsrra-amexmfeurer
authored
Calculate multiple metrics (#985)
* Option to allow users to calculate multiple metrics for a pipeline * Fix: metric score was not calculated when scoring_function passed * Fixing code formatting * Incorporating review comments * Update run_auto-sklearn_for_metadata_generation.py * Update test_train_evaluator.py * Update estimators.py * Update __init__.py * Fix build * Removed unnecessary checks * Adding test cases * Update test/test_metric/test_metrics.py Co-authored-by: Matthias Feurer <[email protected]> * Fixing lint Co-authored-by: Rohit Agarwal <[email protected]> Co-authored-by: Matthias Feurer <[email protected]>
1 parent 938175d commit eab0ddb

File tree

16 files changed

+245
-94
lines changed

16 files changed

+245
-94
lines changed

autosklearn/automl.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def __init__(self,
128128
smac_scenario_args=None,
129129
logging_config=None,
130130
metric=None,
131+
scoring_functions=None
131132
):
132133
super(AutoML, self).__init__()
133134
self._backend = backend
@@ -149,6 +150,7 @@ def __init__(self,
149150
self._include_preprocessors = include_preprocessors
150151
self._exclude_preprocessors = exclude_preprocessors
151152
self._resampling_strategy = resampling_strategy
153+
self._scoring_functions = scoring_functions if scoring_functions is not None else []
152154
self._resampling_strategy_arguments = resampling_strategy_arguments \
153155
if resampling_strategy_arguments is not None else {}
154156
if self._resampling_strategy not in ['holdout',
@@ -715,6 +717,7 @@ def fit(
715717
disable_file_output=self._disable_evaluator_output,
716718
get_smac_object_callback=self._get_smac_object_callback,
717719
smac_scenario_args=self._smac_scenario_args,
720+
scoring_functions=self._scoring_functions,
718721
ensemble_callback=proc_ensemble,
719722
)
720723

@@ -1038,7 +1041,7 @@ def score(self, X, y):
10381041
prediction=prediction,
10391042
task_type=self._task,
10401043
metric=self._metric,
1041-
all_scoring_functions=False)
1044+
scoring_functions=None)
10421045

10431046
@property
10441047
def cv_results_(self):
@@ -1073,11 +1076,21 @@ def cv_results_(self):
10731076
masks[name] = []
10741077
hp_names.append(name)
10751078

1079+
metric_mask = dict()
1080+
metric_dict = dict()
1081+
metric_name = []
1082+
1083+
for metric in self._scoring_functions:
1084+
metric_name.append(metric.name)
1085+
metric_dict[metric.name] = []
1086+
metric_mask[metric.name] = []
1087+
10761088
mean_test_score = []
10771089
mean_fit_time = []
10781090
params = []
10791091
status = []
10801092
budgets = []
1093+
10811094
for run_key in self.runhistory_.data:
10821095
run_value = self.runhistory_.data[run_key]
10831096
config_id = run_key.config_id
@@ -1120,7 +1133,23 @@ def cv_results_(self):
11201133
parameter_dictionaries[hp_name].append(hp_value)
11211134
masks[hp_name].append(mask_value)
11221135

1136+
for metric in self._scoring_functions:
1137+
if metric.name in run_value.additional_info.keys():
1138+
metric_cost = run_value.additional_info[metric.name]
1139+
metric_value = metric._optimum - (metric._sign * metric_cost)
1140+
mask_value = False
1141+
else:
1142+
metric_value = np.NaN
1143+
mask_value = True
1144+
metric_dict[metric.name].append(metric_value)
1145+
metric_mask[metric.name].append(mask_value)
1146+
11231147
results['mean_test_score'] = np.array(mean_test_score)
1148+
for name in metric_name:
1149+
masked_array = ma.MaskedArray(metric_dict[name],
1150+
metric_mask[name])
1151+
results['metric_%s' % name] = masked_array
1152+
11241153
results['mean_fit_time'] = np.array(mean_fit_time)
11251154
results['params'] = params
11261155
results['rank_test_scores'] = scipy.stats.rankdata(1 - results['mean_test_score'],

autosklearn/ensemble_builder.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ def score_ensemble_preds(self):
874874
prediction=y_ensemble,
875875
task_type=self.task_type,
876876
metric=self.metric,
877-
all_scoring_functions=False)
877+
scoring_functions=None)
878878

879879
if np.isfinite(self.read_scores[y_ens_fn]["ens_score"]):
880880
self.logger.debug(
@@ -1349,7 +1349,7 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
13491349
prediction=train_pred,
13501350
task_type=self.task_type,
13511351
metric=self.metric,
1352-
all_scoring_functions=False
1352+
scoring_functions=None
13531353
)
13541354
}
13551355
if valid_pred is not None:
@@ -1360,7 +1360,7 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
13601360
prediction=valid_pred,
13611361
task_type=self.task_type,
13621362
metric=self.metric,
1363-
all_scoring_functions=False
1363+
scoring_functions=None
13641364
)
13651365

13661366
# In case test_pred was provided
@@ -1370,7 +1370,7 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
13701370
prediction=test_pred,
13711371
task_type=self.task_type,
13721372
metric=self.metric,
1373-
all_scoring_functions=False
1373+
scoring_functions=None
13741374
)
13751375

13761376
self.ensemble_history.append(performance_stamp)

autosklearn/ensembles/ensemble_selection.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,15 +143,15 @@ def _fast(
143143
)
144144

145145
# Calculate score is versatile and can return a dict of score
146-
# when all_scoring_functions=False, we know it will be a float
146+
# when scoring_functions=None, we know it will be a float
147147
calculated_score = cast(
148148
float,
149149
calculate_score(
150150
solution=labels,
151151
prediction=fant_ensemble_prediction,
152152
task_type=self.task_type,
153153
metric=self.metric,
154-
all_scoring_functions=False
154+
scoring_functions=None
155155
)
156156
)
157157
scores[j] = self.metric._optimum - calculated_score
@@ -193,15 +193,15 @@ def _slow(
193193
ensemble.append(pred)
194194
ensemble_prediction = np.mean(np.array(ensemble), axis=0)
195195
# Calculate score is versatile and can return a dict of score
196-
# when all_scoring_functions=False, we know it will be a float
196+
# when scoring_functions=None, we know it will be a float
197197
calculated_score = cast(
198198
float,
199199
calculate_score(
200200
solution=labels,
201201
prediction=ensemble_prediction,
202202
task_type=self.task_type,
203203
metric=self.metric,
204-
all_scoring_functions=False
204+
scoring_functions=None
205205
)
206206
)
207207
scores[j] = self.metric._optimum - calculated_score

autosklearn/estimators.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- encoding: utf-8 -*-
22

3-
from typing import Optional, Dict
3+
from typing import Optional, Dict, List
44

55
import dask.distributed
66
import joblib
@@ -9,6 +9,7 @@
99
from sklearn.utils.multiclass import type_of_target
1010

1111
from autosklearn.automl import AutoMLClassifier, AutoMLRegressor, AutoML
12+
from autosklearn.metrics import Scorer
1213
from autosklearn.util.backend import create
1314

1415

@@ -42,6 +43,7 @@ def __init__(
4243
logging_config=None,
4344
metadata_directory=None,
4445
metric=None,
46+
scoring_functions: Optional[List[Scorer]] = None,
4547
load_models: bool = True,
4648
):
4749
"""
@@ -218,6 +220,10 @@ def __init__(
218220
Metrics`_.
219221
If None is provided, a default metric is selected depending on the task.
220222
223+
scoring_functions : List[Scorer], optional (None)
224+
List of scorers which will be calculated for each pipeline and results will be
225+
available via ``cv_results``
226+
221227
load_models : bool, optional (True)
222228
Whether to load the models after fitting Auto-sklearn.
223229
@@ -261,6 +267,7 @@ def __init__(
261267
self.logging_config = logging_config
262268
self.metadata_directory = metadata_directory
263269
self._metric = metric
270+
self._scoring_functions = scoring_functions
264271
self._load_models = load_models
265272

266273
self.automl_ = None # type: Optional[AutoML]
@@ -316,7 +323,8 @@ def build_automl(
316323
smac_scenario_args=smac_scenario_args,
317324
logging_config=self.logging_config,
318325
metadata_directory=self.metadata_directory,
319-
metric=self._metric
326+
metric=self._metric,
327+
scoring_functions=self._scoring_functions
320328
)
321329

322330
return automl

autosklearn/evaluation/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ class ExecuteTaFuncWithQueue(AbstractTAFunc):
9898
def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
9999
cost_for_crash, abort_on_first_run_crash,
100100
initial_num_run=1, stats=None,
101-
run_obj='quality', par_factor=1, all_scoring_functions=False,
101+
run_obj='quality', par_factor=1, scoring_functions=None,
102102
output_y_hat_optimization=True, include=None, exclude=None,
103103
memory_limit=None, disable_file_output=False, init_params=None,
104104
budget_type=None, ta=False, pynisher_context='spawn', **resampling_strategy_args):
@@ -152,7 +152,7 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
152152
self.metric = metric
153153
self.resampling_strategy = resampling_strategy
154154
self.resampling_strategy_args = resampling_strategy_args
155-
self.all_scoring_functions = all_scoring_functions
155+
self.scoring_functions = scoring_functions
156156
# TODO deactivate output_y_hat_optimization and let the respective evaluator decide
157157
self.output_y_hat_optimization = output_y_hat_optimization
158158
self.include = include
@@ -274,7 +274,7 @@ def run(
274274
metric=self.metric,
275275
seed=self.autosklearn_seed,
276276
num_run=num_run,
277-
all_scoring_functions=self.all_scoring_functions,
277+
scoring_functions=self.scoring_functions,
278278
output_y_hat_optimization=self.output_y_hat_optimization,
279279
include=self.include,
280280
exclude=self.exclude,

autosklearn/evaluation/abstract_evaluator.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def send_warnings_to_log(message, category, filename, lineno,
110110
class AbstractEvaluator(object):
111111
def __init__(self, backend, queue, metric,
112112
configuration=None,
113-
all_scoring_functions=False,
113+
scoring_functions=None,
114114
seed=1,
115115
output_y_hat_optimization=True,
116116
num_run=None,
@@ -141,7 +141,7 @@ def __init__(self, backend, queue, metric,
141141
self.seed = seed
142142

143143
self.output_y_hat_optimization = output_y_hat_optimization
144-
self.all_scoring_functions = all_scoring_functions
144+
self.scoring_functions = scoring_functions
145145

146146
if isinstance(disable_file_output, (bool, list)):
147147
self.disable_file_output = disable_file_output
@@ -221,7 +221,7 @@ def _get_model(self):
221221
init_params=self._init_params)
222222
return model
223223

224-
def _loss(self, y_true, y_hat, all_scoring_functions=None):
224+
def _loss(self, y_true, y_hat, scoring_functions=None):
225225
"""Auto-sklearn follows a minimization goal, so the make_scorer
226226
sign is used as a guide to obtain the value to reduce.
227227
@@ -233,20 +233,20 @@ def _loss(self, y_true, y_hat, all_scoring_functions=None):
233233
For accuracy for example: optimum(1) - (+1 * actual score)
234234
For logloss for example: optimum(0) - (-1 * actual score)
235235
"""
236-
all_scoring_functions = (
237-
self.all_scoring_functions
238-
if all_scoring_functions is None
239-
else all_scoring_functions
236+
scoring_functions = (
237+
self.scoring_functions
238+
if scoring_functions is None
239+
else scoring_functions
240240
)
241241
if not isinstance(self.configuration, Configuration):
242-
if all_scoring_functions:
243-
return {self.metric: 1.0}
242+
if scoring_functions:
243+
return {self.metric.name: 1.0}
244244
else:
245245
return 1.0
246246

247247
score = calculate_score(
248248
y_true, y_hat, self.task_type, self.metric,
249-
all_scoring_functions=all_scoring_functions)
249+
scoring_functions=scoring_functions)
250250

251251
if hasattr(score, '__len__'):
252252
# TODO: instead of using self.metric, it should use all metrics given by key.

autosklearn/evaluation/test_evaluator.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class TestEvaluator(AbstractEvaluator):
1919

2020
def __init__(self, backend, queue, metric,
2121
configuration=None,
22-
all_scoring_functions=False,
22+
scoring_functions=None,
2323
seed=1,
2424
include=None,
2525
exclude=None,
@@ -30,7 +30,7 @@ def __init__(self, backend, queue, metric,
3030
queue=queue,
3131
configuration=configuration,
3232
metric=metric,
33-
all_scoring_functions=all_scoring_functions,
33+
scoring_functions=scoring_functions,
3434
seed=seed,
3535
output_y_hat_optimization=False,
3636
num_run=-1,
@@ -74,7 +74,7 @@ def predict_and_loss(self, train=False):
7474
prediction=Y_pred,
7575
task_type=self.task_type,
7676
metric=self.metric,
77-
all_scoring_functions=self.all_scoring_functions)
77+
scoring_functions=self.scoring_functions)
7878
else:
7979
Y_pred = self.predict_function(self.X_test, self.model,
8080
self.task_type, self.Y_train)
@@ -83,7 +83,7 @@ def predict_and_loss(self, train=False):
8383
prediction=Y_pred,
8484
task_type=self.task_type,
8585
metric=self.metric,
86-
all_scoring_functions=self.all_scoring_functions)
86+
scoring_functions=self.scoring_functions)
8787

8888
if hasattr(score, '__len__'):
8989
if self.task_type in CLASSIFICATION_TASKS:
@@ -101,13 +101,13 @@ def predict_and_loss(self, train=False):
101101
# create closure for evaluating an algorithm
102102
# Has a stupid name so pytest doesn't regard it as a test
103103
def eval_t(queue, config, backend, metric, seed, num_run, instance,
104-
all_scoring_functions, output_y_hat_optimization, include,
104+
scoring_functions, output_y_hat_optimization, include,
105105
exclude, disable_file_output, init_params=None, budget_type=None,
106106
budget=None):
107107
evaluator = TestEvaluator(configuration=config,
108108
backend=backend, metric=metric, seed=seed,
109109
queue=queue,
110-
all_scoring_functions=all_scoring_functions,
110+
scoring_functions=scoring_functions,
111111
include=include, exclude=exclude,
112112
disable_file_output=disable_file_output,
113113
init_params=init_params)

0 commit comments

Comments
 (0)