Update ESS nudge logic in EarlyStopping Healthcheck (#4745)

shrutipatel31 · facebook-github-bot · commit 2223d45928af · 2026-01-09T14:34:15.000-08:00
Summary: Pull Request resolved: #4745 Differential Revision: D90035967 Privacy Context Container: L1307644
diff --git a/ax/analysis/healthcheck/early_stopping_healthcheck.py b/ax/analysis/healthcheck/early_stopping_healthcheck.py
@@ -21,9 +21,10 @@
 from ax.core.map_metric import MapMetric
 from ax.core.optimization_config import MultiObjectiveOptimizationConfig
 from ax.early_stopping.dispatch import get_default_ess_or_none
-from ax.early_stopping.experiment_replay import replay_experiment
+from ax.early_stopping.experiment_replay import (
+    estimate_hypothetical_early_stopping_savings,
+)
 from ax.early_stopping.strategies.base import BaseEarlyStoppingStrategy
-from ax.early_stopping.strategies.percentile import PercentileEarlyStoppingStrategy
 from ax.early_stopping.utils import (
     EARLY_STOPPING_NUDGE_MSG,
     EARLY_STOPPING_NUDGE_TITLE,
@@ -35,7 +36,7 @@
 from ax.service.utils.early_stopping import get_early_stopping_metrics
 from pyre_extensions import none_throws, override
 
-DEFAULT_MIN_SAVINGS_THRESHOLD = 0.01  # 1% threshold
+DEFAULT_MIN_SAVINGS_THRESHOLD = 0.1  # 10% threshold
 MAX_PENDING_TRIALS_DEFAULT = 5
 DEFAULT_EARLY_STOPPING_HEALTHCHECK_TITLE = "Early Stopping Healthcheck"
 
@@ -92,7 +93,7 @@ def __init__(
                 default early stopping strategy will only be used for
                 single-objective unconstrained experiments.
             min_savings_threshold: Minimum savings threshold to suggest early
-                stopping. Default is 0.01 (1% savings).
+                stopping. Default is 0.1 (10% savings).
             max_pending_trials: Maximum number of pending trials for replay
                 orchestrator. Default is 5.
             auto_early_stopping_config: A string for configuring automated early
@@ -396,22 +397,40 @@ def _report_early_stopping_nudge(
         self, experiment: Experiment
     ) -> HealthcheckAnalysisCard:
         """Check if early stopping should be suggested (nudge) by estimating
-        hypothetical savings using replay logic."""
-        # Get map metrics from the experiment
-        # Note: validate_applicable_state already ensures map_metrics is non-empty
-        map_metrics = self._get_map_metrics(experiment)
-
-        # Estimate hypothetical savings for compatible metrics using replay
-        metric_to_savings = self._estimate_hypothetical_savings_with_replay(
-            experiment=experiment, map_metrics=map_metrics
+        hypothetical savings using replay logic.
+
+        Only applicable for single-objective unconstrained experiments where a
+        default early stopping strategy is available.
+        """
+        opt_config = none_throws(experiment.optimization_config)
+        metric = next(iter(opt_config.objective.metrics))
+        savings = estimate_hypothetical_early_stopping_savings(
+            experiment=experiment,
+            metric=metric,
+            max_pending_trials=self.max_pending_trials,
         )
 
-        if not metric_to_savings:
-            # No significant savings detected
+        if savings is None:
+            # savings is None when estimate_hypothetical_early_stopping_savings
+            # cannot compute savings. This happens for:
+            # - Multi-objective or constrained experiments (no default ESS)
+            # - Experiments without MapMetric data
+            # - Experiment replay failures
+            problem_type = self._get_problem_type(experiment)
             return self._create_card(
                 subtitle=(
-                    "Early stopping is not enabled. While this experiment has "
-                    "data with a progression ('step' column) we did not detect "
+                    f"Early stopping is not enabled. Automatic early stopping "
+                    f"savings estimation is not available for this experiment "
+                    f"({problem_type}). If you want to use early stopping, "
+                    f"please configure an early_stopping_strategy explicitly."
+                ),
+                status=HealthcheckStatus.PASS,
+            )
+
+        if savings < self.min_savings_threshold:
+            return self._create_card(
+                subtitle=(
+                    "Early stopping is not enabled. We did not detect "
                     "significant potential savings at this time.\n\n"
                     "This could be because:\n"
                     "- The experiment hasn't run enough trials yet\n"
@@ -423,38 +442,35 @@ def _report_early_stopping_nudge(
             )
 
         # Found significant potential savings - nudge the user
-        best_metric_name = max(metric_to_savings, key=metric_to_savings.get)
-        best_savings = metric_to_savings[best_metric_name]
+        savings_pct = 100 * savings
 
         subtitle = EARLY_STOPPING_NUDGE_MSG.format(
-            metric_name=best_metric_name, savings=best_savings
+            metric_name=metric.name, savings=savings_pct
         )
 
         # Append additional info if provided
         if self.nudge_additional_info:
             subtitle += f" {self.nudge_additional_info}"
 
         # Create detailed metrics table
-        metric_rows = [
-            {
-                "Metric Name": metric_name,
-                "Estimated Savings": f"{savings:.1f}%",
-            }
-            for metric_name, savings in sorted(
-                metric_to_savings.items(), key=lambda x: x[1], reverse=True
-            )
-        ]
-        df = pd.DataFrame(metric_rows)
+        df = pd.DataFrame(
+            [
+                {
+                    "Metric Name": metric.name,
+                    "Estimated Savings": f"{savings_pct:.1f}%",
+                }
+            ]
+        )
 
-        title = EARLY_STOPPING_NUDGE_TITLE.format(savings=best_savings)
+        title = EARLY_STOPPING_NUDGE_TITLE.format(savings=savings_pct)
 
         return self._create_card(
             title=title,
             subtitle=subtitle,
             df=df,
             status=HealthcheckStatus.WARNING,
-            potential_savings=best_savings,
-            best_metric=best_metric_name,
+            potential_savings=savings_pct,
+            best_metric=metric.name,
         )
 
     def _get_problem_type(self, experiment: Experiment) -> str:
@@ -485,63 +501,3 @@ def _get_map_metrics(self, experiment: Experiment) -> list[MapMetric]:
                 reverse=True,
             )
         return map_metrics
-
-    def _estimate_hypothetical_savings_with_replay(
-        self, experiment: Experiment, map_metrics: list[MapMetric]
-    ) -> dict[str, float]:
-        """
-        Estimate hypothetical early stopping savings for each map metric using
-        replay infrastructure.
-
-        This is the accurate method that replays the experiment with early stopping
-        enabled to calculate actual savings.
-
-        Args:
-            experiment: The experiment to analyze
-            map_metrics: List of MapMetrics to analyze
-
-        Returns:
-            Dictionary mapping metric names to estimated savings percentages
-            (only includes metrics where savings > min_savings_threshold)
-        """
-        metric_to_savings: dict[str, float] = {}
-
-        MAX_REPLAYS = 3
-        MAX_REPLAY_TRIALS = 50
-        REPLAY_NUM_POINTS_PER_CURVE = 20
-        REPLAY_PERCENTILE_THRESHOLD = 65
-        REPLAY_MIN_PROGRESSION_FRAC = 0.4
-        REPLAY_MIN_CURVES = 5
-
-        # Limit to first few metrics to avoid expensive computation
-        for map_metric in map_metrics[:MAX_REPLAYS]:
-            try:
-                # Create replayed experiment with early stopping
-                replayed_experiment = replay_experiment(
-                    historical_experiment=experiment,
-                    num_samples_per_curve=REPLAY_NUM_POINTS_PER_CURVE,
-                    max_replay_trials=MAX_REPLAY_TRIALS,
-                    metric=map_metric,
-                    max_pending_trials=self.max_pending_trials,
-                    early_stopping_strategy=PercentileEarlyStoppingStrategy(
-                        min_curves=REPLAY_MIN_CURVES,
-                        min_progression=REPLAY_MIN_PROGRESSION_FRAC,
-                        percentile_threshold=REPLAY_PERCENTILE_THRESHOLD,
-                        normalize_progressions=True,
-                    ),
-                )
-
-                if replayed_experiment is not None:
-                    savings = estimate_early_stopping_savings(
-                        experiment=replayed_experiment
-                    )
-
-                    # Only include if savings exceed threshold (> 1%)
-                    if savings > self.min_savings_threshold:
-                        metric_to_savings[map_metric.name] = 100 * savings
-
-            except Exception:
-                # Skip metrics that fail replay
-                continue
-
-        return metric_to_savings
diff --git a/ax/analysis/healthcheck/tests/test_early_stopping_healthcheck.py b/ax/analysis/healthcheck/tests/test_early_stopping_healthcheck.py
@@ -139,15 +139,38 @@ def test_early_stopping_not_enabled(self) -> None:
         """Test behavior when early stopping is not enabled."""
         healthcheck = EarlyStoppingAnalysis(early_stopping_strategy=None)
 
-        with self.subTest("no_savings_detected"):
-            card = healthcheck.compute(experiment=self.experiment)
+        with self.subTest("no_savings_available"):
+            # Mock estimate_hypothetical_early_stopping_savings to return None
+            # This happens for MOO/constrained experiments, non-MapMetric data,
+            # or replay failures
+            with patch(
+                "ax.analysis.healthcheck.early_stopping_healthcheck"
+                ".estimate_hypothetical_early_stopping_savings",
+                return_value=None,
+            ):
+                card = healthcheck.compute(experiment=self.experiment)
+            self.assertEqual(card.get_status(), HealthcheckStatus.PASS)
+            self.assertIn("Early stopping is not enabled", card.subtitle)
+            self.assertIn("Automatic early stopping savings estimation", card.subtitle)
+
+        with self.subTest("low_savings_detected"):
+            # Mock low savings below threshold (default 10%)
+            mock_savings = 0.05  # 5% savings
+            with patch(
+                "ax.analysis.healthcheck.early_stopping_healthcheck"
+                ".estimate_hypothetical_early_stopping_savings",
+                return_value=mock_savings,
+            ):
+                card = healthcheck.compute(experiment=self.experiment)
+            self.assertEqual(card.get_status(), HealthcheckStatus.PASS)
             self.assertIn("Early stopping is not enabled", card.subtitle)
+            self.assertIn("did not detect significant potential savings", card.subtitle)
 
         with self.subTest("potential_savings_detected"):
-            mock_savings = {"ax_test_metric": 25.0}
-            with patch.object(
-                healthcheck,
-                "_estimate_hypothetical_savings_with_replay",
+            mock_savings = 0.25  # 25% as a decimal
+            with patch(
+                "ax.analysis.healthcheck.early_stopping_healthcheck"
+                ".estimate_hypothetical_early_stopping_savings",
                 return_value=mock_savings,
             ):
                 card = healthcheck.compute(experiment=self.experiment)
@@ -324,11 +347,8 @@ def test_hypothetical_savings_nudge(self) -> None:
 
         with self.subTest("basic_nudge"):
             with patch(
-                "ax.analysis.healthcheck.early_stopping_healthcheck.replay_experiment",
-                return_value=object(),
-            ), patch(
                 "ax.analysis.healthcheck.early_stopping_healthcheck"
-                ".estimate_early_stopping_savings",
+                ".estimate_hypothetical_early_stopping_savings",
                 return_value=0.25,
             ):
                 card = healthcheck.compute(experiment=self.experiment)
@@ -345,10 +365,10 @@ def test_hypothetical_savings_nudge(self) -> None:
                 early_stopping_strategy=None, nudge_additional_info=nudge_info
             )
 
-            mock_savings = {"ax_test_metric": 25.0}
-            with patch.object(
-                healthcheck_with_info,
-                "_estimate_hypothetical_savings_with_replay",
+            mock_savings = 0.25
+            with patch(
+                "ax.analysis.healthcheck.early_stopping_healthcheck"
+                ".estimate_hypothetical_early_stopping_savings",
                 return_value=mock_savings,
             ):
                 card = healthcheck_with_info.compute(experiment=self.experiment)