Extract early stopping replay utilities to OSS (#4744)

shrutipatel31 · facebook-github-bot · commit d1b20e5fad15 · 2026-01-06T12:35:04.000-08:00
Summary:

Adds the `estimate_hypothetical_early_stopping_savings()` function to the OSS module. This function estimates potential compute savings by replaying an experiment with a default early stopping strategy.

Key changes:

- Added `estimate_hypothetical_early_stopping_savings()` to `experiment_replay.py` which combines `get_default_ess_or_none()`, `replay_experiment()`, and `estimate_early_stopping_savings()` into a single utility
- Added constants `MAX_REPLAY_TRIALS`, `REPLAY_NUM_POINTS_PER_CURVE`, and `MAX_PENDING_TRIALS` to `experiment_replay.py`
- Added optional `minimize` parameter to `replay_experiment()` to explicitly control optimization direction
- Updated `ax_sweep_orchestrator.py` to use the new `estimate_hypothetical_early_stopping_savings()` function
- Added unit tests for the new function in `test_experiment_replay.py`

Differential Revision: D90150341
diff --git a/ax/early_stopping/experiment_replay.py b/ax/early_stopping/experiment_replay.py
@@ -17,7 +17,9 @@
 from ax.core.optimization_config import OptimizationConfig
 from ax.core.parameter import ParameterType, RangeParameter
 from ax.core.search_space import SearchSpace
+from ax.early_stopping.dispatch import get_default_ess_or_none
 from ax.early_stopping.strategies.base import BaseEarlyStoppingStrategy
+from ax.early_stopping.utils import estimate_early_stopping_savings
 from ax.generation_strategy.generation_strategy import (
     GenerationStep,
     GenerationStrategy,
@@ -29,6 +31,11 @@
 
 logger: Logger = get_logger(__name__)
 
+# Constants for experiment replay
+MAX_REPLAY_TRIALS: int = 50
+REPLAY_NUM_POINTS_PER_CURVE: int = 20
+MAX_PENDING_TRIALS: int = 5
+
 
 def replay_experiment(
     historical_experiment: Experiment,
@@ -38,6 +45,7 @@ def replay_experiment(
     max_pending_trials: int,
     early_stopping_strategy: BaseEarlyStoppingStrategy | None,
     logging_level: int = logging.ERROR,
+    minimize: bool | None = None,
 ) -> Experiment | None:
     """A utility function for replaying a historical experiment's data
     by initializing a Orchestrator that quickly steps through the existing data.
@@ -60,7 +68,7 @@ def replay_experiment(
         lower_is_better=metric.lower_is_better,
     )
     optimization_config = OptimizationConfig(
-        objective=Objective(metric=replay_metric),
+        objective=Objective(metric=replay_metric, minimize=minimize),
     )
     runner = MapDataReplayRunner(replay_metric=replay_metric)
 
@@ -105,3 +113,53 @@ def replay_experiment(
     orchestrator.run_all_trials()
     logger.info(f"Replayed the experiment in {perf_counter() - start_time} seconds.")
     return experiment
+
+
+def estimate_hypothetical_early_stopping_savings(
+    experiment: Experiment,
+    metric: Metric,
+    max_pending_trials: int = MAX_PENDING_TRIALS,
+    minimize: bool | None = None,
+) -> float | None:
+    """Estimate hypothetical early stopping savings using experiment replay.
+
+    This function replays the experiment with a default early stopping strategy
+    to calculate what savings would have been achieved if early stopping were
+    enabled.
+
+    Note: Returns None for multi-objective, constrained, or non-MapMetric
+    experiments, as `get_default_ess_or_none` does not provide a default
+    early stopping strategy for these experiment types.
+
+    Args:
+        experiment: The experiment to analyze.
+        metric: The metric to use for early stopping replay.
+        max_pending_trials: Maximum number of pending trials for the replay
+            orchestrator. Defaults to 5.
+        minimize: Whether the metric should be minimized. If None, it will be
+            inferred from the metric's lower_is_better attribute.
+
+    Returns:
+        Estimated savings as a fraction (0.0 to 1.0), or None if:
+        - No default early stopping strategy is available for this experiment
+          (e.g., multi-objective, constrained, or non-MapMetric experiments)
+        - The experiment replay failed
+    """
+    default_ess = get_default_ess_or_none(experiment=experiment)
+    if default_ess is None:
+        return None
+
+    replayed_experiment = replay_experiment(
+        historical_experiment=experiment,
+        num_samples_per_curve=REPLAY_NUM_POINTS_PER_CURVE,
+        max_replay_trials=MAX_REPLAY_TRIALS,
+        metric=metric,
+        max_pending_trials=max_pending_trials,
+        early_stopping_strategy=default_ess,
+        minimize=minimize,
+    )
+
+    if replayed_experiment is None:
+        return None
+
+    return estimate_early_stopping_savings(experiment=replayed_experiment)
diff --git a/ax/early_stopping/tests/test_experiment_replay.py b/ax/early_stopping/tests/test_experiment_replay.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from unittest.mock import MagicMock, patch
+
+from ax.early_stopping.experiment_replay import (
+    estimate_hypothetical_early_stopping_savings,
+)
+from ax.utils.common.testutils import TestCase
+from ax.utils.testing.core_stubs import (
+    get_branin_experiment,
+    get_branin_experiment_with_timestamp_map_metric,
+)
+from pyre_extensions import none_throws
+
+
+class TestEstimateHypotheticalEarlyStoppingSavings(TestCase):
+    def test_returns_none_for_non_map_metric_experiment(self) -> None:
+        """Test that None is returned when experiment has no MapMetric."""
+        exp = get_branin_experiment(has_optimization_config=True)
+        metric = none_throws(exp.optimization_config).objective.metric
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    def test_returns_none_for_multi_objective(self) -> None:
+        """Test that None is returned for multi-objective experiments."""
+        exp = get_branin_experiment_with_timestamp_map_metric(multi_objective=True)
+        # Use first metric from optimization config for multi-objective
+        metric = list(none_throws(exp.optimization_config).metrics.values())[0]
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    def test_returns_none_for_constrained_experiment(self) -> None:
+        """Test that None is returned for experiments with outcome constraints."""
+        exp = get_branin_experiment_with_timestamp_map_metric(
+            with_outcome_constraint=True
+        )
+        metric = none_throws(exp.optimization_config).objective.metric
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_none_when_replay_fails(
+        self, mock_replay_experiment: MagicMock
+    ) -> None:
+        """Test that None is returned when replay_experiment fails."""
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replay_experiment.return_value = None
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+        mock_replay_experiment.assert_called_once()
+
+    @patch("ax.early_stopping.experiment_replay.estimate_early_stopping_savings")
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_savings_on_successful_replay(
+        self,
+        mock_replay_experiment: MagicMock,
+        mock_estimate_savings: MagicMock,
+    ) -> None:
+        """Test that savings are returned when replay succeeds."""
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replayed_exp = MagicMock()
+        mock_replay_experiment.return_value = mock_replayed_exp
+        mock_estimate_savings.return_value = 0.25
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertEqual(result, 0.25)
+        mock_estimate_savings.assert_called_once_with(experiment=mock_replayed_exp)