Extract early stopping replay utilities to OSS (facebook#4744)

shrutipatel31 · facebook-github-bot · commit 43bff93fbac2 · 2026-01-09T14:33:47.000-08:00
Summary:

Adds the `estimate_hypothetical_early_stopping_savings()` function to the OSS module. This function estimates potential compute savings by replaying an experiment with a default early stopping strategy.

Differential Revision: D90150341
diff --git a/ax/early_stopping/experiment_replay.py b/ax/early_stopping/experiment_replay.py
@@ -17,7 +17,9 @@
 from ax.core.optimization_config import OptimizationConfig
 from ax.core.parameter import ParameterType, RangeParameter
 from ax.core.search_space import SearchSpace
+from ax.early_stopping.dispatch import get_default_ess_or_none
 from ax.early_stopping.strategies.base import BaseEarlyStoppingStrategy
+from ax.early_stopping.utils import estimate_early_stopping_savings
 from ax.generation_strategy.generation_strategy import (
     GenerationStep,
     GenerationStrategy,
@@ -29,6 +31,11 @@
 
 logger: Logger = get_logger(__name__)
 
+# Constants for experiment replay
+MAX_REPLAY_TRIALS: int = 50
+REPLAY_NUM_POINTS_PER_CURVE: int = 20
+MAX_PENDING_TRIALS: int = 5
+
 
 def replay_experiment(
     historical_experiment: Experiment,
@@ -105,3 +112,54 @@ def replay_experiment(
     orchestrator.run_all_trials()
     logger.info(f"Replayed the experiment in {perf_counter() - start_time} seconds.")
     return experiment
+
+
+def estimate_hypothetical_early_stopping_savings(
+    experiment: Experiment,
+    metric: Metric,
+    max_pending_trials: int = MAX_PENDING_TRIALS,
+) -> float | None:
+    """Estimate hypothetical early stopping savings using experiment replay.
+
+    This function replays the experiment with a default early stopping strategy
+    to calculate what savings would have been achieved if early stopping were
+    enabled.
+
+    Note: Returns None for multi-objective, constrained, or non-MapMetric
+    experiments, as `get_default_ess_or_none` does not provide a default
+    early stopping strategy for these experiment types.
+
+    Args:
+        experiment: The experiment to analyze.
+        metric: The metric to use for early stopping replay.
+        max_pending_trials: Maximum number of pending trials for the replay
+            orchestrator. Defaults to 5.
+
+    Returns:
+        Estimated savings as a fraction (0.0 to 1.0), or None if:
+        - No default early stopping strategy is available for this experiment
+          (e.g., multi-objective, constrained, or non-MapMetric experiments)
+        - The experiment replay failed
+    """
+    try:
+        default_ess = get_default_ess_or_none(experiment=experiment)
+        if default_ess is None:
+            return None
+
+        replayed_experiment = replay_experiment(
+            historical_experiment=experiment,
+            num_samples_per_curve=REPLAY_NUM_POINTS_PER_CURVE,
+            max_replay_trials=MAX_REPLAY_TRIALS,
+            metric=metric,
+            max_pending_trials=max_pending_trials,
+            early_stopping_strategy=default_ess,
+        )
+
+        if replayed_experiment is None:
+            return None
+
+        return estimate_early_stopping_savings(experiment=replayed_experiment)
+    except Exception:
+        # Replay can fail due to invalid experiment state (e.g., missing name,
+        # incompatible data format) or internal errors during orchestration.
+        return None
diff --git a/ax/early_stopping/tests/test_experiment_replay.py b/ax/early_stopping/tests/test_experiment_replay.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from unittest.mock import MagicMock, patch
+
+from ax.early_stopping.experiment_replay import (
+    estimate_hypothetical_early_stopping_savings,
+)
+from ax.utils.common.testutils import TestCase
+from ax.utils.testing.core_stubs import (
+    get_branin_experiment,
+    get_branin_experiment_with_timestamp_map_metric,
+)
+from pyre_extensions import none_throws
+
+
+class TestEstimateHypotheticalEarlyStoppingSavings(TestCase):
+    def test_returns_none_for_non_map_metric_experiment(self) -> None:
+        """Test that None is returned when experiment has no MapMetric."""
+        exp = get_branin_experiment(has_optimization_config=True)
+        metric = none_throws(exp.optimization_config).objective.metric
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    def test_returns_none_for_multi_objective(self) -> None:
+        """Test that None is returned for multi-objective experiments."""
+        exp = get_branin_experiment_with_timestamp_map_metric(multi_objective=True)
+        # Use first metric from optimization config for multi-objective
+        metric = list(none_throws(exp.optimization_config).metrics.values())[0]
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    def test_returns_none_for_constrained_experiment(self) -> None:
+        """Test that None is returned for experiments with outcome constraints."""
+        exp = get_branin_experiment_with_timestamp_map_metric(
+            with_outcome_constraint=True
+        )
+        metric = none_throws(exp.optimization_config).objective.metric
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_none_when_replay_fails(
+        self, mock_replay_experiment: MagicMock
+    ) -> None:
+        """Test that None is returned when replay_experiment fails."""
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replay_experiment.return_value = None
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+        mock_replay_experiment.assert_called_once()
+
+    @patch("ax.early_stopping.experiment_replay.estimate_early_stopping_savings")
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_savings_on_successful_replay(
+        self,
+        mock_replay_experiment: MagicMock,
+        mock_estimate_savings: MagicMock,
+    ) -> None:
+        """Test that savings are returned when replay succeeds."""
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replayed_exp = MagicMock()
+        mock_replay_experiment.return_value = mock_replayed_exp
+        mock_estimate_savings.return_value = 0.25
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertEqual(result, 0.25)
+        mock_estimate_savings.assert_called_once_with(experiment=mock_replayed_exp)
+
+    @patch("ax.early_stopping.experiment_replay.replay_experiment")
+    def test_returns_none_when_exception_raised(
+        self, mock_replay_experiment: MagicMock
+    ) -> None:
+        """Test that None is returned when replay fails due to invalid experiment
+        state (e.g., missing name) or internal orchestration errors.
+        """
+        exp = get_branin_experiment_with_timestamp_map_metric()
+        metric = none_throws(exp.optimization_config).objective.metric
+        mock_replay_experiment.side_effect = ValueError("Experiment's name is None.")
+
+        result = estimate_hypothetical_early_stopping_savings(
+            experiment=exp,
+            metric=metric,
+        )
+
+        self.assertIsNone(result)
+        mock_replay_experiment.assert_called_once()