Update regression metrics to handle multiseries problems (#4233)

eccabay · web-flow · commit 695b5ec6a45a · 2023-07-12T15:16:10.000-04:00
* Update RMSLE and MaxError to accomodate multiseries

* Update MAPE to use sktime impl for multiseries support

* Add tests for regression+df metrics
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,7 @@ Release Notes
 -------------
 **Future Releases**
     * Enhancements
+        * Updated regression metrics to handle multioutput dataframes as well as single output series :pr:`4233`
     * Fixes
     * Changes
         * Unpinned sktime version :pr:`4214`
diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py
@@ -143,13 +143,17 @@ def validate_inputs(self, y_true, y_predicted):
             )
         if len(y_true) == 0:
             raise ValueError("Length of inputs is 0")
+
+        if isinstance(y_true, pd.DataFrame):
+            y_true = y_true.to_numpy().flatten()
         if np.isnan(y_true).any() or np.isinf(y_true).any():
             raise ValueError("y_true contains NaN or infinity")
-        # y_predicted could be a 1d vector (predictions) or a 2d vector (classifier predicted probabilities)
-        y_pred_flat = y_predicted.to_numpy().flatten()
-        if np.isnan(y_pred_flat).any() or np.isinf(y_pred_flat).any():
+
+        if isinstance(y_predicted, pd.DataFrame):
+            y_predicted = y_predicted.to_numpy().flatten()
+        if np.isnan(y_predicted).any() or np.isinf(y_predicted).any():
             raise ValueError("y_predicted contains NaN or infinity")
-        if self.score_needs_proba and np.any([(y_pred_flat < 0) | (y_pred_flat > 1)]):
+        if self.score_needs_proba and np.any([(y_predicted < 0) | (y_predicted > 1)]):
             raise ValueError(
                 "y_predicted contains probability estimates not within [0, 1]",
             )
diff --git a/evalml/objectives/standard_metrics.py b/evalml/objectives/standard_metrics.py
@@ -721,13 +721,27 @@ class RootMeanSquaredLogError(RegressionObjective):
 
     def objective_function(self, y_true, y_predicted, X=None, sample_weight=None):
         """Objective function for root mean squared log error for regression."""
-        return np.sqrt(
-            metrics.mean_squared_log_error(
-                y_true,
-                y_predicted,
-                sample_weight=sample_weight,
-            ),
-        )
+
+        def rmsle(y_true, y_pred):
+            return np.sqrt(
+                metrics.mean_squared_log_error(
+                    y_true,
+                    y_pred,
+                    sample_weight=sample_weight,
+                ),
+            )
+
+        # Multiseries time series regression
+        if isinstance(y_true, pd.DataFrame):
+            raw_rmsles = []
+            for i in range(len(y_true.columns)):
+                y_true_i = y_true.iloc[:, i]
+                y_predicted_i = y_predicted.iloc[:, i]
+                raw_rmsles.append(rmsle(y_true_i, y_predicted_i))
+            return np.mean(raw_rmsles)
+
+        # All univariate regression
+        return rmsle(y_true, y_predicted)
 
     @classproperty
     def positive_only(self):
@@ -833,17 +847,13 @@ class MAPE(TimeSeriesRegressionObjective):
 
     def objective_function(self, y_true, y_predicted, X=None, sample_weight=None):
         """Objective function for mean absolute percentage error for time series regression."""
-        if (y_true == 0).any():
+        if 0 in y_true.values:
             raise ValueError(
                 "Mean Absolute Percentage Error cannot be used when "
                 "targets contain the value 0.",
             )
-        if isinstance(y_true, pd.Series):
-            y_true = y_true.to_numpy()
-        if isinstance(y_predicted, pd.Series):
-            y_predicted = y_predicted.to_numpy()
-        scaled_difference = (y_true - y_predicted) / y_true
-        return np.abs(scaled_difference).mean() * 100
+        mape = MeanAbsolutePercentageError()
+        return mape(y_true, y_predicted) * 100
 
     @classproperty
     def positive_only(self):
@@ -871,15 +881,11 @@ class SMAPE(TimeSeriesRegressionObjective):
 
     def objective_function(self, y_true, y_predicted, X=None, sample_weight=None):
         """Objective function for mean absolute percentage error for time series regression."""
-        if ((abs(y_true) + abs(y_predicted)) == 0).any():
+        if 0 in (abs(y_true) + abs(y_predicted)).values:
             raise ValueError(
                 "Symmetric Mean Absolute Percentage Error cannot be used when "
                 "true and predicted targets both contain the value 0.",
             )
-        if isinstance(y_true, pd.Series):
-            y_true = y_true.to_numpy()
-        if isinstance(y_predicted, pd.Series):
-            y_predicted = y_predicted.to_numpy()
 
         smape = MeanAbsolutePercentageError(symmetric=True)
         return smape(y_true, y_predicted) * 100
@@ -958,6 +964,16 @@ class MaxError(RegressionObjective):
 
     def objective_function(self, y_true, y_predicted, X=None, sample_weight=None):
         """Objective function for maximum residual error for regression."""
+        # Multiseries time series regression
+        if isinstance(y_true, pd.DataFrame):
+            raw_max_errors = []
+            for i in range(len(y_true.columns)):
+                y_true_i = y_true.iloc[:, i]
+                y_predicted_i = y_predicted.iloc[:, i]
+                raw_max_errors.append(metrics.max_error(y_true_i, y_predicted_i))
+            return np.mean(raw_max_errors)
+
+        # All other regression problems
         return metrics.max_error(y_true, y_predicted)
 
 
diff --git a/evalml/tests/objective_tests/test_standard_metrics.py b/evalml/tests/objective_tests/test_standard_metrics.py
@@ -31,6 +31,7 @@
     RecallMacro,
     RecallMicro,
     RecallWeighted,
+    RegressionObjective,
     RootMeanSquaredError,
     RootMeanSquaredLogError,
 )
@@ -158,6 +159,34 @@ def test_negative_with_log():
             objective.score(y_true, y_predicted)
 
 
+@pytest.mark.parametrize("objective_class", _all_objectives_dict().values())
+def test_regression_handles_dataframes(objective_class):
+    if not issubclass(objective_class, RegressionObjective):
+        pytest.skip("Skipping non-regression objective")
+
+    y_predicted = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    y_true = pd.DataFrame({"a": [1, 2, 3], "b": [4, 6, 6]})
+
+    objective = objective_class()
+    score = objective.score(y_true, y_predicted)
+    assert isinstance(score, float)  # Output should be a float average
+
+
+@pytest.mark.parametrize("mismatch_dim", ["columns", "rows", "both"])
+def test_dataframe_different_dimensions(mismatch_dim):
+    y_predicted = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    if mismatch_dim == "columns":
+        y_true = pd.DataFrame({"a": [1, 2, 3]})
+    if mismatch_dim == "rows":
+        y_true = pd.DataFrame({"a": [1, 2], "b": [4, 6]})
+    else:
+        y_true = pd.DataFrame({"a": [1, 2]})
+
+    objective = MAPE()
+    with pytest.raises(ValueError, match="Inputs have mismatched dimensions"):
+        objective.score(y_true, y_predicted)
+
+
 def test_binary_more_than_two_unique_values():
     y_predicted = np.array([0, 1, 2])
     y_true = np.array([1, 0, 1])