Skip to content

Commit 27d8d37

Browse files
authored
add MAPE to regression metrics (fixes #691) (#822)
* add MAPE to regression metrics (fixes #691) * linting * fix compatibility with older scikit-learn * linting
1 parent 415c23b commit 27d8d37

File tree

4 files changed

+78
-1
lines changed

4 files changed

+78
-1
lines changed

dask_ml/metrics/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
)
77
from .regression import ( # noqa
88
mean_absolute_error,
9+
mean_absolute_percentage_error,
910
mean_squared_error,
1011
mean_squared_log_error,
1112
r2_score,

dask_ml/metrics/regression.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,69 @@ def mean_absolute_error(
8181
return result
8282

8383

84+
def mean_absolute_percentage_error(
85+
y_true: ArrayLike,
86+
y_pred: ArrayLike,
87+
sample_weight: Optional[ArrayLike] = None,
88+
multioutput: Optional[str] = "uniform_average",
89+
compute: bool = True,
90+
) -> ArrayLike:
91+
"""Mean absolute percentage error regression loss.
92+
93+
Note here that we do not represent the output as a percentage in range
94+
[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in
95+
https://scikit-learn.org/stable/modules/model_evaluation.html#mean-absolute-percentage-error
96+
97+
Parameters
98+
----------
99+
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
100+
Ground truth (correct) target values.
101+
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
102+
Estimated target values.
103+
sample_weight : array-like of shape (n_samples,), default=None
104+
Sample weights.
105+
multioutput : {'raw_values', 'uniform_average'} or array-like
106+
Defines aggregating of multiple output values.
107+
Array-like value defines weights used to average errors.
108+
If input is list then the shape must be (n_outputs,).
109+
'raw_values' :
110+
Returns a full set of errors in case of multioutput input.
111+
'uniform_average' :
112+
Errors of all outputs are averaged with uniform weight.
113+
compute : bool
114+
Whether to compute this result (default ``True``)
115+
116+
Returns
117+
-------
118+
loss : float or array-like of floats in the range [0, 1/eps]
119+
If multioutput is 'raw_values', then mean absolute percentage error
120+
is returned for each output separately.
121+
If multioutput is 'uniform_average' or ``None``, then the
122+
equally-weighted average of all output errors is returned.
123+
MAPE output is non-negative floating point. The best value is 0.0.
124+
But note the fact that bad predictions can lead to arbitarily large
125+
MAPE values, especially if some y_true values are very close to zero.
126+
Note that we return a large value instead of `inf` when y_true is zero.
127+
"""
128+
_check_sample_weight(sample_weight)
129+
epsilon = np.finfo(np.float64).eps
130+
mape = abs(y_pred - y_true) / da.maximum(y_true, epsilon)
131+
output_errors = mape.mean(axis=0)
132+
133+
if isinstance(multioutput, str) or multioutput is None:
134+
if multioutput == "raw_values":
135+
if compute:
136+
return output_errors.compute()
137+
else:
138+
return output_errors
139+
else:
140+
raise ValueError("Weighted 'multioutput' not supported.")
141+
result = output_errors.mean()
142+
if compute:
143+
result = result.compute()
144+
return result
145+
146+
84147
@derived_from(sklearn.metrics)
85148
def r2_score(
86149
y_true: ArrayLike,

docs/source/modules/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ Regression Metrics
245245
:toctree: generated/
246246

247247
metrics.mean_absolute_error
248+
metrics.mean_absolute_percentage_error
248249
metrics.mean_squared_error
249250
metrics.mean_squared_log_error
250251
metrics.r2_score

tests/metrics/test_regression.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,26 @@
55
import sklearn.metrics
66

77
import dask_ml.metrics
8+
from dask_ml._compat import SK_024
89

10+
_METRICS_TO_TEST = [
11+
"mean_squared_error",
12+
"mean_absolute_error",
13+
"r2_score",
14+
]
915

10-
@pytest.fixture(params=["mean_squared_error", "mean_absolute_error", "r2_score"])
16+
# mean_absolute_percentage_error() was added in scikit-learn 0.24.0
17+
if SK_024:
18+
_METRICS_TO_TEST.append("mean_absolute_percentage_error")
19+
20+
21+
@pytest.fixture(params=_METRICS_TO_TEST)
1122
def metric_pairs(request):
1223
"""Pairs of (dask-ml, sklearn) regression metrics.
1324
1425
* mean_squared_error
1526
* mean_absolute_error
27+
* mean_absolute_percentage_error (if scikit-learn >= 0.24.0)
1628
* r2_score
1729
"""
1830
return (

0 commit comments

Comments
 (0)