Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dask_ml/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
)
from .regression import ( # noqa
mean_absolute_error,
mean_absolute_percentage_error,
mean_squared_error,
mean_squared_log_error,
r2_score,
Expand Down
63 changes: 63 additions & 0 deletions dask_ml/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,69 @@ def mean_absolute_error(
return result


def mean_absolute_percentage_error(
y_true: ArrayLike,
y_pred: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
multioutput: Optional[str] = "uniform_average",
compute: bool = True,
) -> ArrayLike:
"""Mean absolute percentage error regression loss.

Note here that we do not represent the output as a percentage in range
[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in
https://scikit-learn.org/stable/modules/model_evaluation.html#mean-absolute-percentage-error

Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
If input is list then the shape must be (n_outputs,).
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
compute : bool
Whether to compute this result (default ``True``)

Returns
-------
loss : float or array-like of floats in the range [0, 1/eps]
If multioutput is 'raw_values', then mean absolute percentage error
is returned for each output separately.
If multioutput is 'uniform_average' or ``None``, then the
equally-weighted average of all output errors is returned.
MAPE output is non-negative floating point. The best value is 0.0.
But note the fact that bad predictions can lead to arbitarily large
MAPE values, especially if some y_true values are very close to zero.
Note that we return a large value instead of `inf` when y_true is zero.
"""
_check_sample_weight(sample_weight)
epsilon = np.finfo(np.float64).eps
mape = abs(y_pred - y_true) / da.maximum(y_true, epsilon)
output_errors = mape.mean(axis=0)

if isinstance(multioutput, str) or multioutput is None:
if multioutput == "raw_values":
if compute:
return output_errors.compute()
else:
return output_errors
else:
raise ValueError("Weighted 'multioutput' not supported.")
result = output_errors.mean()
if compute:
result = result.compute()
return result


@derived_from(sklearn.metrics)
def r2_score(
y_true: ArrayLike,
Expand Down
1 change: 1 addition & 0 deletions docs/source/modules/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ Regression Metrics
:toctree: generated/

metrics.mean_absolute_error
metrics.mean_absolute_percentage_error
metrics.mean_squared_error
metrics.mean_squared_log_error
metrics.r2_score
Expand Down
14 changes: 13 additions & 1 deletion tests/metrics/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,26 @@
import sklearn.metrics

import dask_ml.metrics
from dask_ml._compat import SK_024

_METRICS_TO_TEST = [
"mean_squared_error",
"mean_absolute_error",
"r2_score",
]

@pytest.fixture(params=["mean_squared_error", "mean_absolute_error", "r2_score"])
# mean_absolute_percentage_error() was added in scikit-learn 0.24.0
if SK_024:
_METRICS_TO_TEST.append("mean_absolute_percentage_error")


@pytest.fixture(params=_METRICS_TO_TEST)
def metric_pairs(request):
"""Pairs of (dask-ml, sklearn) regression metrics.

* mean_squared_error
* mean_absolute_error
* mean_absolute_percentage_error (if scikit-learn >= 0.24.0)
* r2_score
"""
return (
Expand Down