diff --git a/dask_ml/metrics/__init__.py b/dask_ml/metrics/__init__.py index 05349d10f..32b85f721 100644 --- a/dask_ml/metrics/__init__.py +++ b/dask_ml/metrics/__init__.py @@ -6,6 +6,7 @@ ) from .regression import ( # noqa mean_absolute_error, + mean_absolute_percentage_error, mean_squared_error, mean_squared_log_error, r2_score, diff --git a/dask_ml/metrics/regression.py b/dask_ml/metrics/regression.py index d1849ef88..7fcdde8fe 100644 --- a/dask_ml/metrics/regression.py +++ b/dask_ml/metrics/regression.py @@ -81,6 +81,69 @@ def mean_absolute_error( return result +def mean_absolute_percentage_error( + y_true: ArrayLike, + y_pred: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + multioutput: Optional[str] = "uniform_average", + compute: bool = True, +) -> ArrayLike: + """Mean absolute percentage error regression loss. + + Note here that we do not represent the output as a percentage in range + [0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in + https://scikit-learn.org/stable/modules/model_evaluation.html#mean-absolute-percentage-error + + Parameters + ---------- + y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values. + y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) + Estimated target values. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. + multioutput : {'raw_values', 'uniform_average'} or array-like + Defines aggregating of multiple output values. + Array-like value defines weights used to average errors. + If input is list then the shape must be (n_outputs,). + 'raw_values' : + Returns a full set of errors in case of multioutput input. + 'uniform_average' : + Errors of all outputs are averaged with uniform weight. + compute : bool + Whether to compute this result (default ``True``) + + Returns + ------- + loss : float or array-like of floats in the range [0, 1/eps] + If multioutput is 'raw_values', then mean absolute percentage error + is returned for each output separately. + If multioutput is 'uniform_average' or ``None``, then the + equally-weighted average of all output errors is returned. + MAPE output is non-negative floating point. The best value is 0.0. + But note the fact that bad predictions can lead to arbitarily large + MAPE values, especially if some y_true values are very close to zero. + Note that we return a large value instead of `inf` when y_true is zero. + """ + _check_sample_weight(sample_weight) + epsilon = np.finfo(np.float64).eps + mape = abs(y_pred - y_true) / da.maximum(y_true, epsilon) + output_errors = mape.mean(axis=0) + + if isinstance(multioutput, str) or multioutput is None: + if multioutput == "raw_values": + if compute: + return output_errors.compute() + else: + return output_errors + else: + raise ValueError("Weighted 'multioutput' not supported.") + result = output_errors.mean() + if compute: + result = result.compute() + return result + + @derived_from(sklearn.metrics) def r2_score( y_true: ArrayLike, diff --git a/docs/source/modules/api.rst b/docs/source/modules/api.rst index 601357ba6..7a2d4d06c 100644 --- a/docs/source/modules/api.rst +++ b/docs/source/modules/api.rst @@ -245,6 +245,7 @@ Regression Metrics :toctree: generated/ metrics.mean_absolute_error + metrics.mean_absolute_percentage_error metrics.mean_squared_error metrics.mean_squared_log_error metrics.r2_score diff --git a/tests/metrics/test_regression.py b/tests/metrics/test_regression.py index dfdc5480c..475b6e31c 100644 --- a/tests/metrics/test_regression.py +++ b/tests/metrics/test_regression.py @@ -5,14 +5,26 @@ import sklearn.metrics import dask_ml.metrics +from dask_ml._compat import SK_024 +_METRICS_TO_TEST = [ + "mean_squared_error", + "mean_absolute_error", + "r2_score", +] -@pytest.fixture(params=["mean_squared_error", "mean_absolute_error", "r2_score"]) +# mean_absolute_percentage_error() was added in scikit-learn 0.24.0 +if SK_024: + _METRICS_TO_TEST.append("mean_absolute_percentage_error") + + +@pytest.fixture(params=_METRICS_TO_TEST) def metric_pairs(request): """Pairs of (dask-ml, sklearn) regression metrics. * mean_squared_error * mean_absolute_error + * mean_absolute_percentage_error (if scikit-learn >= 0.24.0) * r2_score """ return (