Sort experiment data by trial_index, arm_name, and metrics (facebook#3885)

andycylmeta · facebook-github-bot · commit dbf7beffa233 · 2025-06-06T20:13:18.000-07:00
Summary: Pull Request resolved: facebook#3885 Arm order in metric results were displayed per the data order. This led to a random ordering of arms in the UI {F1977891605} To fix this, we want to sort the arm ordering in data. Data will be sorted by trail_index, then arm_name. Arm name will be sorted as 'custom_name' < '0_1' < '0_2' < '0_11' < '0_100' Reviewed By: lena-kashtelyan, saitcakmak Differential Revision: D74409276 fbshipit-source-id: 51d4de4e50f20ac009ed29ec58d4a417be2857a1
diff --git a/ax/core/base_trial.py b/ax/core/base_trial.py
@@ -14,6 +14,8 @@
 from datetime import datetime, timedelta
 from typing import Any, TYPE_CHECKING
 
+import pandas as pd
+
 from ax.core.arm import Arm
 from ax.core.data import Data
 from ax.core.formatting_utils import data_and_evaluations_from_raw_data
@@ -386,9 +388,13 @@ def fetch_data(self, metrics: list[Metric] | None = None, **kwargs: Any) -> Data
             MapMetric if self.experiment.default_data_constructor == MapData else Metric
         )
 
-        return base_metric_cls._unwrap_trial_data_multi(
+        data = base_metric_cls._unwrap_trial_data_multi(
             results=self.fetch_data_results(metrics=metrics, **kwargs)
         )
+        if not isinstance(data, MapData):
+            data._df = sort_by_trial_index_and_arm_name(data._df)
+
+        return data
 
     def lookup_data(self) -> Data:
         """Lookup cached data on experiment for this trial.
@@ -831,3 +837,58 @@ def _update_trial_attrs_on_clone(
             new_trial.mark_failed(reason=self.failed_reason)
             return
         new_trial.mark_as(self.status, unsafe=True)
+
+
+def sort_by_trial_index_and_arm_name(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Sorts the dataframe by trial index and arm name. The arm names with default patterns
+    (e.g. `0_1`, `3_11`) are sorted by trial index part (before underscore) and arm
+    number part (after underscore) within trial index. The arm names with non-default
+    patterns (e.g. `status_quo`, `control`, `capped_param_1`) are sorted alphabetically
+    and will be on the top of the sorted dataframe.
+
+    Args:
+        df: The DataFrame to sort.
+
+    Returns:
+        The sorted DataFrame.
+    """
+
+    # Create new columns for sorting the default arm names
+    df["is_default"] = pd.notna(df["arm_name"]) & df["arm_name"].str.count(
+        pat=r"^\d+_\d+$"
+    )
+
+    df["trial_index_part"] = float("NaN")
+    df["arm_name_part"] = float("NaN")
+
+    split_arm_name = df.loc[df["is_default"], "arm_name"].str.split("_")
+    df.loc[df["is_default"], "trial_index_part"] = split_arm_name.str.get(0).astype(int)
+    df.loc[df["is_default"], "arm_name_part"] = split_arm_name.str.get(1).astype(int)
+
+    # Sort the DataFrame by the new columns (trial_index_part and arm_number_part)
+    # for default arm names
+    df = (
+        df.sort_values(
+            by=[
+                "trial_index",
+                "is_default",
+                "trial_index_part",
+                "arm_name_part",
+                "arm_name",
+            ],
+            inplace=False,
+        ).reset_index(drop=True)
+        if not df.empty
+        else df
+    )
+
+    # Drop the temporary 'trial_index_part' and 'arm_number_part' columns
+    df.drop(
+        columns=["trial_index_part", "arm_name_part", "is_default"],
+        # Ignore errors that occur when dropping columns that do not exist in the
+        # dataframe.
+        errors="ignore",
+        inplace=True,
+    )
+    return df
diff --git a/ax/core/experiment.py b/ax/core/experiment.py
@@ -21,7 +21,7 @@
 import pandas as pd
 from ax.core.arm import Arm
 from ax.core.auxiliary import AuxiliaryExperiment, AuxiliaryExperimentPurpose
-from ax.core.base_trial import BaseTrial
+from ax.core.base_trial import BaseTrial, sort_by_trial_index_and_arm_name
 from ax.core.batch_trial import BatchTrial, LifecycleStage
 from ax.core.data import Data
 from ax.core.formatting_utils import DATA_TYPE_LOOKUP, DataType
@@ -713,6 +713,7 @@ def _lookup_or_fetch_trials_results(
                 trials=trials,
                 **kwargs,
             )
+
             contains_new_data = contains_new_data or new_results_contains_new_data
 
             # Merge in results
@@ -820,6 +821,8 @@ def attach_data(
             )
         cur_time_millis = current_timestamp_in_millis()
         for trial_index, trial_df in data.true_df.groupby(data.true_df["trial_index"]):
+            if not isinstance(data, MapData):
+                trial_df = sort_by_trial_index_and_arm_name(df=trial_df)
             # Overwrite `df` so that `data` only has current trial data.
             data_init_args["df"] = trial_df
             current_trial_data = (
diff --git a/ax/core/tests/test_experiment.py b/ax/core/tests/test_experiment.py
@@ -16,6 +16,7 @@
 from ax.core.auxiliary import AuxiliaryExperiment, AuxiliaryExperimentPurpose
 from ax.core.base_trial import BaseTrial, TrialStatus
 from ax.core.data import Data
+from ax.core.experiment import sort_by_trial_index_and_arm_name
 from ax.core.map_data import MapData
 from ax.core.map_metric import MapMetric
 from ax.core.metric import Metric
@@ -65,6 +66,7 @@
     get_test_map_data_experiment,
 )
 from ax.utils.testing.mock import mock_botorch_optimize
+from pandas.testing import assert_frame_equal
 from pyre_extensions import assert_is_instance
 
 DUMMY_RUN_METADATA_KEY = "test_run_metadata_key"
@@ -697,6 +699,81 @@ def test_FetchTrialsData(self) -> None:
             set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms}
         )
 
+    def test_attach_and_sort_data(self) -> None:
+        n = 4
+        exp = self._setupBraninExperiment(n)
+        batch = exp.trials[0]
+        batch.mark_completed()
+        self.assertEqual(exp.completed_trials, [batch])
+
+        # test sorting data
+        unsorted_df = pd.DataFrame(
+            {
+                "arm_name": [
+                    "0_0",
+                    "0_2",
+                    "0_11",
+                    "0_1",
+                    "status_quo",
+                    "1_0",
+                    "1_1",
+                    "1_2",
+                    "1_13",
+                ],
+                "metric_name": ["b"] * 9,
+                "mean": list(range(1, 10)),
+                "sem": [0.1 + i * 0.05 for i in range(9)],
+                "trial_index": [0, 0, 0, 0, 0, 1, 1, 1, 1],
+            }
+        )
+
+        sorted_dfs = []
+        sorted_dfs.append(
+            pd.DataFrame(
+                {
+                    "trial_index": [0] * 5,
+                    "arm_name": [
+                        "status_quo",
+                        "0_0",
+                        "0_1",
+                        "0_2",
+                        "0_11",
+                    ],
+                    "metric_name": ["b"] * 5,
+                    "mean": [5.0, 1.0, 4.0, 2.0, 3.0],
+                    "sem": [0.3, 0.1, 0.25, 0.15, 0.2],
+                }
+            )
+        )
+
+        sorted_dfs.append(
+            pd.DataFrame(
+                {
+                    "trial_index": [1] * 4,
+                    "arm_name": [
+                        "1_0",
+                        "1_1",
+                        "1_2",
+                        "1_13",
+                    ],
+                    "metric_name": ["b"] * 4,
+                    "mean": [6.0, 7.0, 8.0, 9.0],
+                    "sem": [0.35, 0.4, 0.45, 0.5],
+                }
+            )
+        )
+
+        exp.attach_data(
+            Data(
+                df=unsorted_df,
+            )
+        )
+        for trial_index in [0, 1]:
+            assert_frame_equal(
+                list(exp.data_by_trial[trial_index].values())[0].df,
+                sorted_dfs[trial_index],
+            )
+
     def test_immutable_search_space_and_opt_config(self) -> None:
         mutable_exp = self._setupBraninExperiment(n=5)
         self.assertFalse(mutable_exp.immutable_search_space_and_opt_config)
@@ -1750,3 +1827,73 @@ def test_name_and_store_arm_if_not_exists_same_proposed_name_different_signature
             experiment._name_and_store_arm_if_not_exists(
                 arm=arm_2, proposed_name="different proposed name"
             )
+
+    def test_sorting_data_by_trial_index_and_arm_name(self) -> None:
+        # test sorting data
+        unsorted_df = pd.DataFrame(
+            {
+                "trial_index": [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
+                "arm_name": [
+                    "0_0",
+                    "0_2",
+                    "custom_arm_1",
+                    "0_11",
+                    "status_quo",
+                    "0_1",
+                    "1_0",
+                    "custom_arm_2",
+                    "1_1",
+                    "status_quo",
+                    "1_2",
+                    "1_3",
+                ],
+                "metric_name": ["b"] * 12,
+                "mean": [float(x) for x in range(1, 13)],
+                "sem": [0.1 + i * 0.05 for i in range(12)],
+            }
+        )
+
+        expected_sorted_df = pd.DataFrame(
+            {
+                "trial_index": [0] * 6 + [1] * 6,
+                "arm_name": [
+                    "custom_arm_1",
+                    "status_quo",
+                    "0_0",
+                    "0_1",
+                    "0_2",
+                    "0_11",
+                    "custom_arm_2",
+                    "status_quo",
+                    "1_0",
+                    "1_1",
+                    "1_2",
+                    "1_3",
+                ],
+                "metric_name": ["b"] * 12,
+                "mean": [3.0, 5.0, 1.0, 6.0, 2.0, 4.0, 8.0, 10.0, 7.0, 9.0, 11.0, 12.0],
+                "sem": [
+                    0.2,
+                    0.3,
+                    0.1,
+                    0.35,
+                    0.15,
+                    0.25,
+                    0.45,
+                    0.55,
+                    0.4,
+                    0.5,
+                    0.6,
+                    0.65,
+                ],
+            }
+        )
+
+        sorted_df = sort_by_trial_index_and_arm_name(
+            df=unsorted_df,
+        )
+
+        assert_frame_equal(
+            sorted_df,
+            expected_sorted_df,
+        )