Skip to content

Commit 89d4ff0

Browse files
Carl Hvarfnerfacebook-github-bot
authored andcommitted
optimization_trace to return noiseless function_values instead of noisy
Summary: Changing optimization trace to return underlying noiseless value instead of noisy. Differential Revision: D89407340 Privacy Context Container: L1307644
1 parent 3a707f6 commit 89d4ff0

File tree

6 files changed

+26
-18
lines changed

6 files changed

+26
-18
lines changed

ax/benchmark/benchmark.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray:
806806
"Cumulative epochs not supported for problems with outcome constraints."
807807
)
808808

809-
objective_name = optimization_config.objective.metric.name
809+
objective_name: str = optimization_config.objective.metric.name
810810
data = assert_is_instance(experiment.lookup_data(), MapData)
811811
map_df = data.map_df
812812

@@ -815,39 +815,40 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray:
815815
# to know which actually ran
816816
def _get_df(trial: Trial) -> pd.DataFrame:
817817
"""
818-
Get the (virtual) time each epoch finished at.
818+
Get the (virtual) time each epoch finished at, along with the ground
819+
truth values (Y_true).
819820
"""
820821
metadata = trial.run_metadata["benchmark_metadata"]
821822
backend_simulator = none_throws(metadata.backend_simulator)
822-
# Data for the first metric, which is the only metric
823-
df = next(iter(metadata.dfs.values()))
823+
# Get the DataFrame for the objective metric
824+
df = metadata.dfs[objective_name].copy()
824825
start_time = backend_simulator.get_sim_trial_by_index(
825826
trial.index
826827
).sim_start_time
827828
df["time"] = df["virtual runtime"] + start_time
828829
return df
829830

830-
with_timestamps = pd.concat(
831+
with_timestamps_and_y_true = pd.concat(
831832
(
832833
_get_df(trial=assert_is_instance(trial, Trial))
833834
for trial in experiment.trials.values()
834835
),
835836
axis=0,
836837
ignore_index=True,
837-
)[["trial_index", MAP_KEY, "time"]]
838+
)[["trial_index", MAP_KEY, "time", "Y_true"]]
838839

839840
df = (
840841
map_df.loc[
841842
map_df["metric_name"] == objective_name,
842-
["trial_index", "arm_name", "mean", MAP_KEY],
843+
["trial_index", "arm_name", MAP_KEY],
843844
]
844-
.merge(with_timestamps, how="left")
845+
.merge(with_timestamps_and_y_true, how="left")
845846
.sort_values("time", ignore_index=True)
846847
)
847848
return (
848-
df["mean"].cummin()
849+
df["Y_true"].cummin()
849850
if optimization_config.objective.minimize
850-
else df["mean"].cummax()
851+
else df["Y_true"].cummax()
851852
).to_numpy()
852853

853854

ax/benchmark/benchmark_runner.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,6 @@ def run(self, trial: BaseTrial) -> dict[str, BenchmarkTrialMetadata]:
303303
df=df, noise_stds=self.get_noise_stds(), arm_weights=arm_weights
304304
)
305305
df["trial_index"] = trial.index
306-
df.drop(columns=["Y_true"], inplace=True)
307306
df["metric_signature"] = df["metric_name"]
308307

309308
if self.simulated_backend_runner is not None:

ax/benchmark/benchmark_trial_metadata.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ class BenchmarkTrialMetadata:
2020
2121
Args:
2222
df: A dict mapping each metric name to a Pandas DataFrame with columns
23-
["metric_name", "arm_name", "mean", "sem", and "step"]. The "sem" is
24-
always present in this df even if noise levels are unobserved;
25-
``BenchmarkMetric`` and ``BenchmarkMapMetric`` hide that data if it
26-
should not be observed, and ``BenchmarkMapMetric``s drop data from
27-
time periods that that are not observed based on the (simulated)
28-
trial progression.
23+
["metric_name", "arm_name", "mean", "sem", "Y_true", and "step"]. The
24+
"sem" is always present in this df even if noise levels are
25+
unobserved; ``BenchmarkMetric`` and ``BenchmarkMapMetric`` hide that
26+
data if it should not be observed, and ``BenchmarkMapMetric``s drop
27+
data from time periods that that are not observed based on the
28+
(simulated) trial progression. The "Y_true" column contains the
29+
ground-truth (noiseless) values, which are used for computing the
30+
optimization trace.
2931
backend_simulator: Optionally, the backend simulator that is tracking
3032
the trial's status.
3133
"""

ax/benchmark/tests/test_benchmark_metric.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def _get_one_step_df(
4747
"metric_name": metric_name,
4848
"mean": [1.0, 2.5] if batch else [1.0],
4949
"sem": sem,
50+
"Y_true": [0.9, 2.4] if batch else [0.9],
5051
"trial_index": 0,
5152
"step": step,
5253
"virtual runtime": step,
@@ -59,6 +60,7 @@ def _get_one_step_df(
5960
"metric_name": metric_name,
6061
"mean": [0.5, 1.5] if batch else [0.5],
6162
"sem": sem,
63+
"Y_true": [0.4, 1.4] if batch else [0.4],
6264
"trial_index": 0,
6365
"step": step,
6466
"virtual runtime": step,

ax/benchmark/tests/test_benchmark_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ def test_heterogeneous_noise(self) -> None:
367367
"metric_signature",
368368
"mean",
369369
"sem",
370+
"Y_true",
370371
"trial_index",
371372
"step",
372373
"virtual runtime",

ax/service/utils/best_point.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -844,9 +844,12 @@ def _prepare_data_for_trace(
844844

845845
# Transform to a DataFrame with columns ["trial_index", "arm_name"] +
846846
# relevant metric names, and values being means.
847+
# Use Y_true (ground truth) if available (benchmarking context),
848+
# otherwise fall back to mean (production context)
849+
value_col = "Y_true" if "Y_true" in df.columns else "mean"
847850
df_wide = (
848851
df[df["metric_name"].isin(metrics)]
849-
.set_index(["trial_index", "arm_name", "metric_name"])["mean"]
852+
.set_index(["trial_index", "arm_name", "metric_name"])[value_col]
850853
.unstack(level="metric_name")
851854
)
852855
missing_metrics = [

0 commit comments

Comments
 (0)