Update target trial selection logic to only consider trials with data for optimization config metrics (facebook#4742)

mgarrard · facebook-github-bot · commit be89b7a0fdb6 · 2026-01-06T19:57:48.000-08:00
Summary:

This updates target trial selection logic in the following ways:
1. We only consider trials that either (a) have data for *all* opt config metrics or (b) have data for *all* metrics -- previously if a trial had data for some opt config metrics it passed the check, but this partial data setup causes issues downstream
2. If there is no long run trial, we fallback to short run instead of not identifying a target trial
3. filters out "stale" trials, ie trials that were completed over 10 days ago -- hitting this point would be pretty far down our priority list, but was an idea ItsMrLin initially had that i thought was really interesting
4. lastly if no trials exist after stale is filtered out, use the stale ones anyway (necessary for benchmarking)
5. if we still can't find anything, it will return none

Reviewed By: ItsMrLin

Differential Revision:
D90089411

Privacy Context Container: L1307644
diff --git a/ax/adapter/transforms/tests/test_transform_to_new_sq.py b/ax/adapter/transforms/tests/test_transform_to_new_sq.py
@@ -182,18 +182,6 @@ def test_target_trial_index(self) -> None:
             )
 
         self.assertEqual(t.default_trial_idx, 0)
-        # test falling back to latest trial with SQ data
-        with mock.patch(
-            "ax.adapter.transforms.transform_to_new_sq.get_target_trial_index",
-            return_value=10,
-        ):
-            t = TransformToNewSQ(
-                search_space=self.exp.search_space,
-                experiment_data=experiment_data,
-                adapter=self.adapter,
-            )
-
-        self.assertEqual(t.default_trial_idx, 1)
 
     def test_transform_experiment_data(self) -> None:
         # Create two more trials with different SQ observations.
diff --git a/ax/adapter/transforms/tests/test_winsorize_transform.py b/ax/adapter/transforms/tests/test_winsorize_transform.py
@@ -535,7 +535,13 @@ def test_relative_constraints(self) -> None:
                         "trial_index": t.index,
                         "metric_signature": metric_name,
                     }
-                    for metric_name, mean, sem in (("a", 1.0, 2.0), ("b", 2.0, 4.0))
+                    # Needs data for all metrics in opt config to identify target
+                    # trial for transforms
+                    for metric_name, mean, sem in (
+                        ("a", 1.0, 2.0),
+                        ("b", 2.0, 4.0),
+                        ("c", 3.0, 1.0),
+                    )
                 ]
             )
         )
@@ -553,7 +559,7 @@ def test_relative_constraints(self) -> None:
                 adapter=adapter,
             )
         self.assertDictEqual(
-            t.cutoffs, {"a": (-INF, INF), "b": (-INF, INF), "c": (0.5, INF)}
+            t.cutoffs, {"a": (-INF, INF), "b": (-INF, INF), "c": (-3.25, INF)}
         )
         # Winsorizes with `derelativize_with_raw_status_quo`.
         t = Winsorize(
@@ -563,7 +569,7 @@ def test_relative_constraints(self) -> None:
             config={"derelativize_with_raw_status_quo": True},
         )
         self.assertDictEqual(
-            t.cutoffs, {"a": (-INF, 4.25), "b": (-INF, 4.25), "c": (0.5, INF)}
+            t.cutoffs, {"a": (-INF, 4.25), "b": (-INF, 4.25), "c": (-3.25, INF)}
         )
 
     def test_transform_experiment_data(self) -> None:
diff --git a/ax/adapter/transforms/transform_to_new_sq.py b/ax/adapter/transforms/transform_to_new_sq.py
@@ -77,13 +77,6 @@ def __init__(
             target_trial_index = get_target_trial_index(
                 experiment=none_throws(adapter)._experiment
             )
-            trials_indices_with_sq_data = self.status_quo_data_by_trial.keys()
-            if target_trial_index not in trials_indices_with_sq_data:
-                target_trial_index = max(trials_indices_with_sq_data)
-                logger.warning(
-                    "No status quo data for target trial. Failing back to "
-                    f"{target_trial_index}."
-                )
 
         if target_trial_index is not None:
             self.default_trial_idx: int = assert_is_instance(
diff --git a/ax/analysis/plotly/surface/contour.py b/ax/analysis/plotly/surface/contour.py
@@ -325,7 +325,12 @@ def _prepare_data(
     )
 
     if relativize:
-        target_trial_index = none_throws(get_target_trial_index(experiment=experiment))
+        target_trial_index = none_throws(
+            get_target_trial_index(
+                experiment=experiment,
+                require_data_for_all_metrics=True,
+            )
+        )
         df = relativize_data(
             experiment=experiment,
             df=df,
diff --git a/ax/analysis/plotly/surface/slice.py b/ax/analysis/plotly/surface/slice.py
@@ -286,7 +286,12 @@ def _prepare_data(
     ).sort_values(by=parameter_name)
 
     if relativize:
-        target_trial_index = none_throws(get_target_trial_index(experiment=experiment))
+        target_trial_index = none_throws(
+            get_target_trial_index(
+                experiment=experiment,
+                require_data_for_all_metrics=True,
+            )
+        )
         df = relativize_data(
             experiment=experiment,
             df=df,
diff --git a/ax/analysis/utils.py b/ax/analysis/utils.py
@@ -171,7 +171,10 @@ def prepare_arm_data(
     # Compute the trial index of the target trial both to pass as a fixed feature
     # during prediction if using model predictions, and to relativize against the
     # status quo arm from the target trial if relativizing.
-    target_trial_index = get_target_trial_index(experiment=experiment)
+    target_trial_index = get_target_trial_index(
+        experiment=experiment,
+        require_data_for_all_metrics=True,
+    )
     if use_model_predictions:
         if adapter is None:
             raise UserInputError(
diff --git a/ax/core/tests/test_utils.py b/ax/core/tests/test_utils.py
@@ -7,6 +7,7 @@
 # pyre-strict
 
 from copy import deepcopy
+from datetime import datetime, timedelta
 from unittest.mock import patch
 
 import numpy as np
@@ -43,6 +44,7 @@
 from ax.utils.common.testutils import TestCase
 from ax.utils.testing.core_stubs import (
     get_branin_data,
+    get_branin_data_batch,
     get_branin_experiment,
     get_experiment,
     get_hierarchical_search_space_experiment,
@@ -178,6 +180,10 @@ def setUp(self) -> None:
                 )
             ],
         )
+        self.batch_experiment = get_branin_experiment(with_completed_trial=False)
+        self.batch_experiment.status_quo = Arm(
+            name="status_quo", parameters={"x1": 0.0, "x2": 0.0}
+        )
 
     def test_get_missing_metrics_by_name(self) -> None:
         expected = {"a": {("0_1", 1)}, "b": {("0_2", 1)}}
@@ -772,6 +778,90 @@ def test_get_target_trial_index_non_batch(self) -> None:
         experiment.attach_data(get_branin_data(trials=[trial]))
         self.assertEqual(get_target_trial_index(experiment=experiment), trial.index)
 
+    def test_get_target_trial_index_stale_trial_filtering(self) -> None:
+        trials = []
+        for days_ago in [15, 5]:  # old trial (stale), new trial (recent)
+            trial = self.batch_experiment.new_batch_trial().add_arm(
+                self.batch_experiment.status_quo
+            )
+            trial.mark_completed(unsafe=True)
+            trial._time_completed = datetime.now() - timedelta(days=days_ago)
+            self.batch_experiment.attach_data(get_branin_data_batch(batch=trial))
+            trials.append(trial)
+
+        self.assertEqual(
+            get_target_trial_index(experiment=self.batch_experiment),
+            trials[1].index,  # newer trial
+        )
+
+    def test_get_target_trial_index_all_stale_fallback(self) -> None:
+        trial = self.batch_experiment.new_batch_trial().add_arm(
+            self.batch_experiment.status_quo
+        )
+        trial.mark_completed(unsafe=True)
+        trial._time_completed = datetime.now() - timedelta(days=15)  # stale
+        self.batch_experiment.attach_data(get_branin_data_batch(batch=trial))
+
+        # fallback to stale trial over none
+        self.assertEqual(
+            get_target_trial_index(experiment=self.batch_experiment), trial.index
+        )
+
+    def test_get_target_trial_index_longrun_to_shortrun_fallback(self) -> None:
+        # long run without data
+        long_run_trial = self.batch_experiment.new_batch_trial(
+            trial_type=Keys.LONG_RUN
+        ).add_arm(self.batch_experiment.status_quo)
+        long_run_trial.mark_running(no_runner_required=True)
+
+        # short run with data
+        short_run_trial = self.batch_experiment.new_batch_trial().add_arm(
+            self.batch_experiment.status_quo
+        )
+        short_run_trial.mark_running(no_runner_required=True)
+        self.batch_experiment.attach_data(get_branin_data_batch(batch=short_run_trial))
+
+        # ahould fallback to short-run trial since long-run has no SQ data
+        self.assertEqual(
+            get_target_trial_index(experiment=self.batch_experiment),
+            short_run_trial.index,
+        )
+
+        # once long-run trial has data, should return long-run trial
+        self.batch_experiment.attach_data(get_branin_data_batch(batch=long_run_trial))
+        self.assertEqual(
+            get_target_trial_index(experiment=self.batch_experiment),
+            long_run_trial.index,
+        )
+
+    def test_get_target_trial_index_opt_config_metric_filtering(self) -> None:
+        # add tracking metric, opt config is already branin
+        self.batch_experiment.add_tracking_metric(Metric(name="test_metric"))
+
+        # trial with opt config data only
+        trial = (
+            self.batch_experiment.new_batch_trial()
+            .add_arm(self.batch_experiment.status_quo)
+            .mark_running(no_runner_required=True)
+        )
+        self.batch_experiment.attach_data(get_branin_data_batch(batch=trial))
+
+        # default should pass because we'll have opt config data
+        self.assertEqual(
+            get_target_trial_index(
+                experiment=self.batch_experiment, require_data_for_all_metrics=False
+            ),
+            trial.index,
+        )
+
+        # when require_data_for_all_metrics=True, should return None
+        # because there are no trials with data for all metrics
+        self.assertIsNone(
+            get_target_trial_index(
+                experiment=self.batch_experiment, require_data_for_all_metrics=True
+            )
+        )
+
     def test_batch_trial_only_decorator(self) -> None:
         # Create a mock function to decorate
         def mock_func(trial: BatchTrial) -> None:
diff --git a/ax/core/utils.py b/ax/core/utils.py

Original file line number	Diff line number	Diff line change
`@@ -535,7 +535,13 @@ def test_relative_constraints(self) -> None:`
`535`	`535`	`"trial_index": t.index,`
`536`	`536`	`"metric_signature": metric_name,`
`537`	`537`	`}`
`538`		`- for metric_name, mean, sem in (("a", 1.0, 2.0), ("b", 2.0, 4.0))`
	`538`	`+ # Needs data for all metrics in opt config to identify target`
	`539`	`+ # trial for transforms`
	`540`	`+ for metric_name, mean, sem in (`
	`541`	`+ ("a", 1.0, 2.0),`
	`542`	`+ ("b", 2.0, 4.0),`
	`543`	`+ ("c", 3.0, 1.0),`
	`544`	`+ )`
`539`	`545`	`]`
`540`	`546`	`)`
`541`	`547`	`)`
`@@ -553,7 +559,7 @@ def test_relative_constraints(self) -> None:`
`553`	`559`	`adapter=adapter,`
`554`	`560`	`)`
`555`	`561`	`self.assertDictEqual(`
`556`		`- t.cutoffs, {"a": (-INF, INF), "b": (-INF, INF), "c": (0.5, INF)}`
	`562`	`+ t.cutoffs, {"a": (-INF, INF), "b": (-INF, INF), "c": (-3.25, INF)}`
`557`	`563`	`)`
`558`	`564`	# Winsorizes with `derelativize_with_raw_status_quo`.
`559`	`565`	`t = Winsorize(`
`@@ -563,7 +569,7 @@ def test_relative_constraints(self) -> None:`
`563`	`569`	`config={"derelativize_with_raw_status_quo": True},`
`564`	`570`	`)`
`565`	`571`	`self.assertDictEqual(`
`566`		`- t.cutoffs, {"a": (-INF, 4.25), "b": (-INF, 4.25), "c": (0.5, INF)}`
	`572`	`+ t.cutoffs, {"a": (-INF, 4.25), "b": (-INF, 4.25), "c": (-3.25, INF)}`
`567`	`573`	`)`
`568`	`574`
`569`	`575`	`def test_transform_experiment_data(self) -> None:`