Skip to content

Commit 697bf4a

Browse files
authored
add auto sota selector llm log in trace (#1169)
1 parent 383e5ed commit 697bf4a

File tree

2 files changed

+35
-15
lines changed

2 files changed

+35
-15
lines changed

rdagent/log/ui/ds_summary.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def shorten_folder_name(folder: str) -> str:
139139
if bt1.toggle("Select Best", key="select_best"):
140140

141141
def apply_func(cdf: pd.DataFrame):
142-
cp = cdf["Competition"].values[0]
142+
cp = base_df.loc[cdf.index[0], "Competition"]
143143
md = get_metric_direction(cp)
144144
# If SOTA Exp Score (valid, to_submit) column is empty, return the first index
145145
if cdf["SOTA Exp Score (valid, to_submit)"].dropna().empty:
@@ -150,7 +150,7 @@ def apply_func(cdf: pd.DataFrame):
150150
best_idx = cdf["SOTA Exp Score (valid, to_submit)"].idxmin()
151151
return best_idx
152152

153-
best_idxs = base_df.groupby("Competition").apply(apply_func)
153+
best_idxs = base_df.groupby("Competition").apply(apply_func, include_groups=False)
154154
base_df["Select"] = base_df.index.isin(best_idxs.values)
155155

156156
base_df = st.data_editor(

rdagent/log/ui/ds_trace.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ def feedback_win(fb_data, llm_data=None):
524524

525525

526526
def sota_win(sota_exp, trace):
527-
st.header("SOTA Experiment", divider="rainbow", anchor="sota-exp")
527+
st.subheader("SOTA Experiment", divider="rainbow", anchor="sota-exp")
528528
if hasattr(trace, "sota_exp_to_submit") and trace.sota_exp_to_submit is not None:
529529
st.markdown(":orange[trace.**sota_exp_to_submit**]")
530530
sota_exp = trace.sota_exp_to_submit
@@ -572,6 +572,9 @@ def main_win(loop_id, llm_data=None):
572572
if "feedback" in loop_data:
573573
feedback_win(loop_data["feedback"], llm_data.get("feedback", None) if llm_data else None)
574574
if "record" in loop_data and "SOTA experiment" in loop_data["record"]:
575+
st.header("Record", divider="violet", anchor="record")
576+
if state.show_llm_log and llm_data is not None and "record" in llm_data:
577+
llm_log_win(llm_data["record"]["no_tag"])
575578
sota_win(loop_data["record"]["SOTA experiment"], loop_data["record"]["trace"])
576579

577580

@@ -707,11 +710,12 @@ def summarize_win():
707710
parents = final_trace.get_parents(node)
708711
root_nodes[node] = parents[0]
709712
parent_nodes[node] = parents[-2] if len(parents) > 1 else None
710-
root_nodes = {final_trace.idx2loop_id[n]: final_trace.idx2loop_id[r] for n, r in root_nodes.items()}
711-
parent_nodes = {
712-
final_trace.idx2loop_id[n]: final_trace.idx2loop_id[r] if r is not None else r
713-
for n, r in parent_nodes.items()
714-
}
713+
if hasattr(final_trace, "idx2loop_id"):
714+
root_nodes = {final_trace.idx2loop_id[n]: final_trace.idx2loop_id[r] for n, r in root_nodes.items()}
715+
parent_nodes = {
716+
final_trace.idx2loop_id[n]: final_trace.idx2loop_id[r] if r is not None else r
717+
for n, r in parent_nodes.items()
718+
}
715719

716720
# Generate Summary Table
717721
df = pd.DataFrame(
@@ -748,7 +752,7 @@ def summarize_win():
748752
df.at[loop, "Others"] = {
749753
k: v
750754
for k, v in loop_data["direct_exp_gen"]["no_tag"].hypothesis.__dict__.items()
751-
if k not in ["component", "hypothesis", "reason"]
755+
if k not in ["component", "hypothesis", "reason"] and v is not None
752756
}
753757
df.loc[loop, "COST($)"] = sum(tc.content["cost"] for tc in state.token_costs[loop])
754758

@@ -872,7 +876,7 @@ def summarize_win():
872876
df.loc[loop, "Feedback"] = "N/A"
873877

874878
if only_success:
875-
df = df[df["Feedback"] == "✅"]
879+
df = df[df["Feedback"].str.contains("✅", na=False)]
876880

877881
# Add color styling based on root_nodes
878882
def style_dataframe_by_root(df, root_nodes):
@@ -935,13 +939,28 @@ def apply_color(row):
935939
st.plotly_chart(curve_figure(vscores))
936940

937941
st.markdown("### Hypotheses Table")
942+
hypotheses_df = df.iloc[:, :8].copy()
943+
others_expanded = pd.json_normalize(hypotheses_df["Others"].fillna({}))
944+
945+
hypotheses_df = hypotheses_df.drop("Others", axis=1)
946+
hypotheses_df = hypotheses_df.drop("Parent N", axis=1)
947+
hypotheses_df = pd.concat([hypotheses_df.iloc[:, :4], others_expanded, hypotheses_df.iloc[:, 4:]], axis=1)
948+
949+
styled_hypotheses_table = style_dataframe_by_root(hypotheses_df, root_nodes)
938950
st.dataframe(
939-
df.iloc[:, :8],
951+
styled_hypotheses_table,
940952
row_height=100,
941953
column_config={
942-
"Others": st.column_config.JsonColumn(width="medium"),
943-
"Reason": st.column_config.TextColumn(width="medium"),
944-
"Hypothesis": st.column_config.TextColumn(width="large"),
954+
k: st.column_config.TextColumn(
955+
k,
956+
width=(
957+
"small"
958+
if k
959+
in ["Component", "Root N", "Parent N", "Run Score (valid)", "Run Score (test)", "problem_label"]
960+
else "medium"
961+
),
962+
)
963+
for k in hypotheses_df.columns
945964
},
946965
)
947966

@@ -1119,7 +1138,8 @@ def get_folders_sorted(log_path, sort_by_time=False):
11191138
- [Coding](#coding)
11201139
- [Running](#running)
11211140
- [Feedback](#feedback)
1122-
- [SOTA Experiment](#sota-exp)
1141+
- [Record](#record)
1142+
- [SOTA Experiment](#sota-exp)
11231143
"""
11241144
)
11251145

0 commit comments

Comments
 (0)