Extract early stopping replay utilities to OSS (#4744)

shrutipatel31 · facebook-github-bot · commit 177cfd2b7172 · 2026-01-09T10:54:13.000-08:00
Summary:

Adds the `estimate_hypothetical_early_stopping_savings()` function to the OSS module. This function estimates potential compute savings by replaying an experiment with a default early stopping strategy.

Key changes:

- Added `estimate_hypothetical_early_stopping_savings()` to `experiment_replay.py` which combines `get_default_ess_or_none()`, `replay_experiment()`, and `estimate_early_stopping_savings()` into a single utility
- Added constants `MAX_REPLAY_TRIALS`, `REPLAY_NUM_POINTS_PER_CURVE`, and `MAX_PENDING_TRIALS` to `experiment_replay.py`
- Added optional `minimize` parameter to `replay_experiment()` to explicitly control optimization direction
- Updated `ax_sweep_orchestrator.py` to use the new `estimate_hypothetical_early_stopping_savings()` function
- Added unit tests for the new function in `test_experiment_replay.py`

Differential Revision: D90150341
diff --git a/ax/early_stopping/experiment_replay.py b/ax/early_stopping/experiment_replay.py
@@ -17,7 +17,9 @@
 from ax.core.optimization_config import OptimizationConfig
 from ax.core.parameter import ParameterType, RangeParameter
 from ax.core.search_space import SearchSpace
+from ax.early_stopping.dispatch import get_default_ess_or_none
 from ax.early_stopping.strategies.base import BaseEarlyStoppingStrategy
+from ax.early_stopping.utils import estimate_early_stopping_savings
 from ax.generation_strategy.generation_strategy import (
     GenerationStep,
     GenerationStrategy,
@@ -29,6 +31,11 @@
 
 logger: Logger = get_logger(__name__)
 
+# Constants for experiment replay
+MAX_REPLAY_TRIALS: int = 50
+REPLAY_NUM_POINTS_PER_CURVE: int = 20
+MAX_PENDING_TRIALS: int = 5
+
 
 def replay_experiment(
     historical_experiment: Experiment,
@@ -105,3 +112,54 @@ def replay_experiment(
     orchestrator.run_all_trials()
     logger.info(f"Replayed the experiment in {perf_counter() - start_time} seconds.")
     return experiment
+
+
+def estimate_hypothetical_early_stopping_savings(
+    experiment: Experiment,
+    metric: Metric,
+    max_pending_trials: int = MAX_PENDING_TRIALS,
+) -> float | None:
+    """Estimate hypothetical early stopping savings using experiment replay.
+
+    This function replays the experiment with a default early stopping strategy
+    to calculate what savings would have been achieved if early stopping were
+    enabled.
+
+    Note: Returns None for multi-objective, constrained, or non-MapMetric
+    experiments, as `get_default_ess_or_none` does not provide a default
+    early stopping strategy for these experiment types.
+
+    Args:
+        experiment: The experiment to analyze.
+        metric: The metric to use for early stopping replay.
+        max_pending_trials: Maximum number of pending trials for the replay
+            orchestrator. Defaults to 5.
+
+    Returns:
+        Estimated savings as a fraction (0.0 to 1.0), or None if:
+        - No default early stopping strategy is available for this experiment
+          (e.g., multi-objective, constrained, or non-MapMetric experiments)
+        - The experiment replay failed
+    """
+    try:
+        default_ess = get_default_ess_or_none(experiment=experiment)
+        if default_ess is None:
+            return None
+
+        replayed_experiment = replay_experiment(
+            historical_experiment=experiment,
+            num_samples_per_curve=REPLAY_NUM_POINTS_PER_CURVE,
+            max_replay_trials=MAX_REPLAY_TRIALS,
+            metric=metric,
+            max_pending_trials=max_pending_trials,
+            early_stopping_strategy=default_ess,
+        )
+
+        if replayed_experiment is None:
+            return None
+
+        return estimate_early_stopping_savings(experiment=replayed_experiment)
+    except Exception:
+        # Replay can fail due to invalid experiment state (e.g., missing name,
+        # incompatible data format) or internal errors during orchestration.
+        return None
diff --git a/ax/early_stopping/tests/test_experiment_replay.py b/ax/early_stopping/tests/test_experiment_replay.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from unittest.mock import MagicMock, patch
+
+from ax.early_stopping.experiment_replay import (
+    estimate_hypothetical_early_stopping_savings,
+)
+from ax.utils.common.testutils import TestCase
+from ax.utils.testing.core_stubs import (
+    get_branin_experiment,
+    get_branin_experiment_with_timestamp_map_metric,
+)
+from pyre_extensions import none_throws
+
+
+class TestEstimateHypotheticalEarlyStoppingSavings(TestCase):
+    def test_returns_none_for_non_map_metric_experiment(self) -> None:
+        """Test that None is returned when experiment has no MapMetric."""
+        exp = get_branin_experiment(has_optimization_config=True)
+        metric = none_throws(exp.optimization_config).objective.metric
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    def test_returns_none_for_multi_objective(self) -> None:
+        """Test that None is returned for multi-objective experiments."""
+        exp = get_branin_experiment_with_timestamp_map_metric(multi_objective=True)
+        # Use first metric from optimization config for multi-objective
+        metric = list(none_throws(exp.optimization_config).metrics.values())[0]
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    def test_returns_none_for_constrained_experiment(self) -> None:
+        """Test that None is returned for experiments with outcome constraints."""
+        exp = get_branin_experiment_with_timestamp_map_metric(
+            with_outcome_constraint=True
+        )
+        metric = none_throws(exp.optimization_config).objective.metric
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_none_when_replay_fails(
+        self, mock_replay_experiment: MagicMock
+    ) -> None:
+        """Test that None is returned when replay_experiment fails."""
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replay_experiment.return_value = None
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+        mock_replay_experiment.assert_called_once()
+
+    @patch("ax.early_stopping.experiment_replay.estimate_early_stopping_savings")
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_savings_on_successful_replay(
+        self,
+        mock_replay_experiment: MagicMock,
+        mock_estimate_savings: MagicMock,
+    ) -> None:
+        """Test that savings are returned when replay succeeds."""
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replayed_exp = MagicMock()
+        mock_replay_experiment.return_value = mock_replayed_exp
+        mock_estimate_savings.return_value = 0.25
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertEqual(result, 0.25)
+        mock_estimate_savings.assert_called_once_with(experiment=mock_replayed_exp)
+
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_none_when_exception_raised(
+        self, mock_replay_experiment: MagicMock
+    ) -> None:
+        """Test that None is returned when replay fails due to invalid experiment
+        state (e.g., missing name) or internal orchestration errors.
+        """
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replay_experiment.side_effect = ValueError("Experiment's name is None.")
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+        mock_replay_experiment.assert_called_once()
diff --git a/ax/early_stopping/utils.py b/ax/early_stopping/utils.py
@@ -19,6 +19,68 @@
 
 logger: Logger = get_logger(__name__)
 
+# Early stopping message constants for use in analysis and reporting
+EARLY_STOPPING_STATUS_MSG = (
+    "Throughout this experiment, {n_stopped} trials were early stopped, out "
+    "of a total of {n_ran} trials. "
+)
+
+EARLY_STOPPING_SAVINGS_TITLE = "Capacity savings due to early stopping"
+
+EARLY_STOPPING_SAVINGS_MSG = (
+    "The capacity savings (computed using {map_key}) are estimated to be "
+    "{savings:.0f}%."
+)
+
+EARLY_STOPPING_SAVINGS_TBD = (
+    "Capacity savings are not yet available. Either no trials have been early "
+    "stopped, or no trials have completed (which is required to estimate "
+    "savings). Check back once more trials are completed and/or early stopped."
+)
+
+EARLY_STOPPING_NUDGE_MSG = (
+    "This sweep uses metrics that are **compatible with early stopping**! "
+    "Using early stopping could have saved you both capacity and optimization "
+    "wall time. For example, we estimate that using early stopping on the "
+    "'{metric_name}' metric could have provided {savings:.0f}% capacity "
+    "savings, with no regression in optimization performance."
+)
+
+EARLY_STOPPING_NUDGE_TITLE = (
+    "{savings:.0f}% potential capacity savings if you turn on " "early stopping feature"
+)
+
+
+def format_early_stopping_savings_message(
+    n_stopped: int,
+    n_ran: int,
+    savings: float,
+) -> str:
+    """Format a message describing early stopping status and savings.
+
+    This function consolidates the common logic used by both AxSweep and the
+    early stopping healthcheck to format early stopping status messages.
+
+    Args:
+        n_stopped: Number of trials that were early stopped.
+        n_ran: Total number of trials that ran (stopped + completed + failed + running).
+        savings: Resource savings as a fraction (0.0 to 1.0). For example, 0.11
+            indicates 11% savings.
+
+    Returns:
+        A formatted message string describing the early stopping status and
+        either the estimated savings percentage or a note that savings are
+        not yet available.
+    """
+    msg = EARLY_STOPPING_STATUS_MSG.format(n_stopped=n_stopped, n_ran=n_ran)
+
+    if savings > 0:
+        msg += EARLY_STOPPING_SAVINGS_MSG.format(map_key=MAP_KEY, savings=savings * 100)
+    else:
+        msg += EARLY_STOPPING_SAVINGS_TBD
+
+    return msg
+
 
 def _is_worse(a: Any, b: Any, minimize: bool) -> Any:
     """Determine if value `a` is worse than value `b` based on optimization direction.