Skip to content

Commit c2b0e73

Browse files
authored
Fix calculate loss (#1123)
* FIX an issue with calculate loss * simplify code * fix unit test
1 parent f6d6be4 commit c2b0e73

File tree

5 files changed

+160
-66
lines changed

5 files changed

+160
-66
lines changed

autosklearn/automl.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
4141
from autosklearn.evaluation.abstract_evaluator import _fit_and_suppress_warnings
4242
from autosklearn.evaluation.train_evaluator import _fit_with_budget
43-
from autosklearn.metrics import calculate_score
43+
from autosklearn.metrics import calculate_metric
4444
from autosklearn.util.backend import Backend
4545
from autosklearn.util.stopwatch import StopWatch
4646
from autosklearn.util.logging_ import (
@@ -1153,11 +1153,10 @@ def score(self, X, y):
11531153
# same representation domain
11541154
prediction = self.InputValidator.target_validator.transform(prediction)
11551155

1156-
return calculate_score(solution=y,
1157-
prediction=prediction,
1158-
task_type=self._task,
1159-
metric=self._metric,
1160-
scoring_functions=None)
1156+
return calculate_metric(solution=y,
1157+
prediction=prediction,
1158+
task_type=self._task,
1159+
metric=self._metric, )
11611160

11621161
@property
11631162
def cv_results_(self):

autosklearn/ensemble_builder.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ def __init__(
420420
performance_range_threshold: float
421421
Keep only models that are better than:
422422
dummy + (best - dummy)*performance_range_threshold
423-
E.g dummy=2, best=4, thresh=0.5 --> only consider models with score > 3
423+
E.g dummy=2, best=4, thresh=0.5 --> only consider models with loss > 3
424424
Will at most return the minimum between ensemble_nbest models,
425425
and max_models_on_disc. Might return less
426426
seed: int
@@ -978,7 +978,7 @@ def get_n_best_preds(self):
978978
# no model left; try to use dummy loss (num_run==0)
979979
# log warning when there are other models but not better than dummy model
980980
if num_keys > num_dummy:
981-
self.logger.warning("No models better than random - using Dummy Score!"
981+
self.logger.warning("No models better than random - using Dummy loss!"
982982
"Number of models besides current dummy model: %d. "
983983
"Number of dummy models: %d",
984984
num_keys - 1,
@@ -1105,8 +1105,7 @@ def get_n_best_preds(self):
11051105
# only if the model ends up in the ensemble
11061106
self.read_losses[k]['loaded'] = 1
11071107

1108-
# return best scored keys of self.read_losses
1109-
# That is, the one with the lowest loss
1108+
# return keys of self.read_losses with lowest losses
11101109
return sorted_keys[:ensemble_n_best]
11111110

11121111
def get_valid_test_preds(self, selected_keys: List[str]) -> Tuple[List[str], List[str]]:

autosklearn/metrics/__init__.py

Lines changed: 122 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -342,18 +342,40 @@ def calculate_score(
342342
metric: Scorer,
343343
scoring_functions: Optional[List[Scorer]] = None
344344
) -> Union[float, Dict[str, float]]:
345+
"""
346+
Returns a score (a magnitude that allows casting the
347+
optimization problem as a maximization one) for the
348+
given Auto-Sklearn Scorer object
349+
350+
Parameters
351+
----------
352+
solution: np.ndarray
353+
The ground truth of the targets
354+
prediction: np.ndarray
355+
The best estimate from the model, of the given targets
356+
task_type: int
357+
To understand if the problem task is classification
358+
or regression
359+
metric: Scorer
360+
Object that host a function to calculate how good the
361+
prediction is according to the solution.
362+
scoring_functions: List[Scorer]
363+
A list of metrics to calculate multiple losses
364+
Returns
365+
-------
366+
float or Dict[str, float]
367+
"""
345368
if task_type not in TASK_TYPES:
346369
raise NotImplementedError(task_type)
347370

348371
if scoring_functions:
349372
score_dict = dict()
350373
if task_type in REGRESSION_TASKS:
351-
# TODO put this into the regression metric itself
352-
cprediction = sanitize_array(prediction)
353-
for metric_ in scoring_functions:
374+
for metric_ in scoring_functions + [metric]:
354375

355376
try:
356-
score_dict[metric_.name] = metric_._sign * metric_(solution, cprediction)
377+
score_dict[metric_.name] = _compute_scorer(
378+
metric_, prediction, solution, task_type)
357379
except ValueError as e:
358380
print(e, e.args[0])
359381
if e.args[0] == "Mean Squared Logarithmic Error cannot be used when " \
@@ -363,13 +385,14 @@ def calculate_score(
363385
raise e
364386

365387
else:
366-
for metric_ in scoring_functions:
388+
for metric_ in scoring_functions + [metric]:
367389

368390
# TODO maybe annotate metrics to define which cases they can
369391
# handle?
370392

371393
try:
372-
score_dict[metric_.name] = metric_._sign * metric_(solution, prediction)
394+
score_dict[metric_.name] = _compute_scorer(
395+
metric_, prediction, solution, task_type)
373396
except ValueError as e:
374397
if e.args[0] == 'multiclass format is not supported':
375398
continue
@@ -383,34 +406,10 @@ def calculate_score(
383406
else:
384407
raise e
385408

386-
if metric.name not in score_dict.keys():
387-
score_dict[metric.name] = get_metric_score(metric, prediction, solution, task_type)
388409
return score_dict
389410

390411
else:
391-
return get_metric_score(metric, prediction, solution, task_type)
392-
393-
394-
def get_metric_score(
395-
metric_: Scorer,
396-
prediction: np.ndarray,
397-
solution: np.ndarray,
398-
task_type: int
399-
) -> float:
400-
# We match the behaviour of GridSearchCV
401-
# In scikit learn, the exact value of the score_func
402-
# is returned (not that of the 'Scorer' which might be
403-
# negative in functions like mse, as scikit learn
404-
# maximizes.) If an user wants to use GridSearchCV
405-
# They are expected to pass neg_mean_squared_error
406-
# For this reason we multiply back by metric_._sign
407-
if task_type in REGRESSION_TASKS:
408-
# TODO put this into the regression metric itself
409-
cprediction = sanitize_array(prediction)
410-
score = metric_._sign * metric_(solution, cprediction)
411-
else:
412-
score = metric_._sign * metric_(solution, prediction)
413-
return score
412+
return _compute_scorer(metric, prediction, solution, task_type)
414413

415414

416415
def calculate_loss(
@@ -422,26 +421,28 @@ def calculate_loss(
422421
) -> Union[float, Dict[str, float]]:
423422
"""
424423
Returns a loss (a magnitude that allows casting the
425-
optimization problem, as a minimization one) for the
424+
optimization problem as a minimization one) for the
426425
given Auto-Sklearn Scorer object
426+
427427
Parameters
428428
----------
429-
solution: np.ndarray
430-
The ground truth of the targets
431-
prediction: np.ndarray
432-
The best estimate from the model, of the given targets
433-
task_type: int
434-
To understand if the problem task is classification
435-
or regression
436-
metric: Scorer
437-
Object that host a function to calculate how good the
438-
prediction is according to the solution.
439-
scoring_functions: List[Scorer]
440-
A list of metrics to calculate multiple losses
429+
solution: np.ndarray
430+
The ground truth of the targets
431+
prediction: np.ndarray
432+
The best estimate from the model, of the given targets
433+
task_type: int
434+
To understand if the problem task is classification
435+
or regression
436+
metric: Scorer
437+
Object that host a function to calculate how good the
438+
prediction is according to the solution.
439+
scoring_functions: List[Scorer]
440+
A list of metrics to calculate multiple losses
441+
441442
Returns
442443
-------
443-
float or Dict[str, float]
444-
A loss function for each of the provided scorer objects
444+
float or Dict[str, float]
445+
A loss function for each of the provided scorer objects
445446
"""
446447
score = calculate_score(
447448
solution=solution,
@@ -463,7 +464,80 @@ def calculate_loss(
463464
# maybe metric argument is not in scoring_functions
464465
# so append it to the list. Rather than check if such
465466
# is the case, redefining loss_dict[metric] is less expensive
466-
loss_dict[metric_.name] = metric_._optimum - metric_._sign * score[metric_.name]
467+
loss_dict[metric_.name] = metric_._optimum - score[metric_.name]
467468
return loss_dict
468469
else:
469-
return metric._optimum - metric._sign * cast(float, score)
470+
rval = metric._optimum - cast(float, score)
471+
return rval
472+
473+
474+
def calculate_metric(
475+
metric: Scorer,
476+
prediction: np.ndarray,
477+
solution: np.ndarray,
478+
task_type: int
479+
) -> float:
480+
"""
481+
Returns a metric for the given Auto-Sklearn Scorer object.
482+
It's direction is determined by the metric itself.
483+
484+
Parameters
485+
----------
486+
solution: np.ndarray
487+
The ground truth of the targets
488+
prediction: np.ndarray
489+
The best estimate from the model, of the given targets
490+
task_type: int
491+
To understand if the problem task is classification
492+
or regression
493+
metric: Scorer
494+
Object that host a function to calculate how good the
495+
prediction is according to the solution.
496+
497+
Returns
498+
-------
499+
float
500+
"""
501+
score = _compute_scorer(
502+
solution=solution,
503+
prediction=prediction,
504+
metric=metric,
505+
task_type=task_type,
506+
)
507+
return metric._sign * score
508+
509+
510+
def _compute_scorer(
511+
metric: Scorer,
512+
prediction: np.ndarray,
513+
solution: np.ndarray,
514+
task_type: int
515+
) -> float:
516+
"""
517+
Returns a score (a magnitude that allows casting the
518+
optimization problem as a maximization one) for the
519+
given Auto-Sklearn Scorer object
520+
521+
Parameters
522+
----------
523+
solution: np.ndarray
524+
The ground truth of the targets
525+
prediction: np.ndarray
526+
The best estimate from the model, of the given targets
527+
task_type: int
528+
To understand if the problem task is classification
529+
or regression
530+
metric: Scorer
531+
Object that host a function to calculate how good the
532+
prediction is according to the solution.
533+
Returns
534+
-------
535+
float
536+
"""
537+
if task_type in REGRESSION_TASKS:
538+
# TODO put this into the regression metric itself
539+
cprediction = sanitize_array(prediction)
540+
score = metric(solution, cprediction)
541+
else:
542+
score = metric(solution, prediction)
543+
return score

test/test_automl/test_automl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ def test_load_best_individual_model(metric, backend, dask_client):
637637
assert automl.score(X_test, Y_test) > 0.9
638638
elif metric.name == 'log_loss':
639639
# Seen values in github actions of 0.6978304740364537
640-
assert automl.score(X_test, Y_test) <= 0.72
640+
assert automl.score(X_test, Y_test) < 0.7
641641
else:
642642
raise ValueError(metric.name)
643643

test/test_metric/test_metrics.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import autosklearn.metrics
99

10-
from autosklearn.metrics import calculate_score, calculate_loss
10+
from autosklearn.metrics import calculate_score, calculate_loss, calculate_metric
1111
from autosklearn.constants import BINARY_CLASSIFICATION, REGRESSION
1212

1313
from smac.utils.constants import MAXINT
@@ -536,7 +536,7 @@ def test_regression_only_metric(self):
536536

537537
def test_calculate_loss():
538538
# In a 0-1 ranged scorer, make sure that the loss
539-
# has a expected positive value
539+
# has an expected positive value
540540
y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
541541
y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
542542
score = sklearn.metrics.accuracy_score(y_true, y_pred)
@@ -546,8 +546,7 @@ def test_calculate_loss():
546546
task_type=BINARY_CLASSIFICATION,
547547
metric=autosklearn.metrics.accuracy,
548548
)
549-
loss = 1.0 - score
550-
assert pytest.approx(loss) == calculate_loss(
549+
assert pytest.approx(1.0 - score) == calculate_loss(
551550
solution=y_true,
552551
prediction=y_pred,
553552
task_type=BINARY_CLASSIFICATION,
@@ -582,14 +581,37 @@ def test_calculate_loss():
582581
y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
583582
y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66])
584583
score = sklearn.metrics.mean_squared_error(y_true, y_pred)
585-
assert pytest.approx(score) == calculate_score(
584+
assert pytest.approx(0 - score) == calculate_score(
585+
solution=y_true,
586+
prediction=y_pred,
587+
task_type=REGRESSION,
588+
metric=autosklearn.metrics.mean_squared_error,
589+
)
590+
assert pytest.approx(score) == calculate_loss(
586591
solution=y_true,
587592
prediction=y_pred,
588593
task_type=REGRESSION,
589594
metric=autosklearn.metrics.mean_squared_error,
590595
)
591-
loss = score
592-
assert pytest.approx(loss) == calculate_loss(
596+
597+
598+
def test_calculate_metric():
599+
# metric to be maximized
600+
y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
601+
y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
602+
score = sklearn.metrics.accuracy_score(y_true, y_pred)
603+
assert pytest.approx(score) == calculate_metric(
604+
solution=y_true,
605+
prediction=y_pred,
606+
task_type=BINARY_CLASSIFICATION,
607+
metric=autosklearn.metrics.accuracy,
608+
)
609+
610+
# metric to be minimized
611+
y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
612+
y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66])
613+
score = sklearn.metrics.mean_squared_error(y_true, y_pred)
614+
assert pytest.approx(score) == calculate_metric(
593615
solution=y_true,
594616
prediction=y_pred,
595617
task_type=REGRESSION,

0 commit comments

Comments
 (0)