Skip to content

Commit 0fcbedf

Browse files
committed
chore: merge on each trace
1 parent 14d543b commit 0fcbedf

File tree

3 files changed

+62
-38
lines changed

3 files changed

+62
-38
lines changed

rdagent/scenarios/data_science/proposal/exp_gen/merge.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -176,34 +176,34 @@ def gen(
176176
)
177177
)
178178

179-
success_fb_list = list(set(trace_fbs))
180-
logger.info(
181-
f"Merge Hypothesis: select {len(success_fb_list)} from {len(trace_fbs)} SOTA experiments found in {len(leaves)} traces"
182-
)
179+
trace_leaf_summaries = []
180+
for leaf_id, leaf in enumerate(leaves):
181+
if leaf == trace.current_selection[0]:
182+
continue
183183

184-
if len(success_fb_list) > 0:
185-
exp_to_merge_fb_desc = T("scenarios.data_science.proposal.exp_gen.merge:trace").r(
186-
exp_and_feedback_list=success_fb_list,
187-
type="success",
188-
heading="Successful iterations:",
189-
success_trial_desc="These trials are the steps or changes that led to the success of the solution to be merged",
190-
pipeline=DS_RD_SETTING.coder_on_whole_pipeline,
184+
exp_fbs = trace.experiment_and_feedback_list_after_init(
185+
return_type="sota",
186+
search_type="ancestors",
187+
selection=(leaf,),
188+
max_retrieve_num=max_sota_retrieved_num_per_trace,
191189
)
192-
else:
193-
exp_index = self.get_exp_index(trace)
194-
exp_to_merge_fb = trace.sota_experiment_fb(selection=(exp_index,))
195-
if exp_to_merge_fb is None:
196-
exp_to_merge_fb = trace.hist[exp_index]
197-
198-
exp_to_merge_fb_desc = T("scenarios.data_science.share:describe.feedback").r(
199-
exp_and_feedback=exp_to_merge_fb,
200-
heading="The feedback for the solution to be merged",
190+
trace_leaf_summaries.append(
191+
{
192+
"leaf_id": leaf_id,
193+
"experiments": exp_fbs,
194+
"best_exp": exp_fbs[-1][0],
195+
}
201196
)
202197

198+
merge_fb_desc = T("scenarios.data_science.proposal.exp_gen.merge:merge_trace").r(
199+
trace_leaf_summaries=trace_leaf_summaries,
200+
success_trial_desc="These trials are the steps or changes that led to the success of the solution to be merged",
201+
)
202+
203203
component_desc = T("scenarios.data_science.share:component_description_in_pipeline").r()
204204
hypothesis_dict = self.hypothesis_gen(
205205
component_desc=component_desc,
206-
exp_feedback_list_desc=exp_to_merge_fb_desc,
206+
exp_feedback_list_desc=merge_fb_desc,
207207
sota_exp_desc=sota_exp_desc,
208208
enable_idea_pool=DS_RD_SETTING.enable_knowledge_base,
209209
pipeline=DS_RD_SETTING.coder_on_whole_pipeline,

rdagent/scenarios/data_science/proposal/exp_gen/merge.yaml

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,39 @@ trace: |-
4242
{% endif %}
4343
{% endfor %}
4444
{% endif %}
45+
merge_trace: |-
46+
{% if not trace_leaf_summaries or trace_leaf_summaries|length == 0 %}
47+
No trace leaves {% if type == "success" %}with SOTA{% elif type == "failure" %}containing failed{% endif %} experiments available.
48+
{% else %}
49+
{% for leaf in trace_leaf_summaries %}
50+
# Trace Leaf: {{ leaf.leaf_id }}
51+
### Recent Experiments (max 4)
52+
{% if not leaf.experiments or leaf.experiments|length == 0 %}
53+
No eligible experiments found on this leaf.
54+
{% else %}
55+
{% for exp_and_feedback in leaf.experiments %}
56+
## Experiment {{ loop.index }}
57+
Target Problem: {{ exp_and_feedback[0].hypothesis.problem_desc }}
58+
Proposed Hypothesis: {{ exp_and_feedback[0].hypothesis.hypothesis }}
59+
Surpass Previous SOTA: {{ exp_and_feedback[1].decision }}
60+
{% if exp_and_feedback[0].result is none %}
61+
Experiment Score: Running buggy
62+
Experiment Error: {{ exp_and_feedback[1].reason }}
63+
{% else %}
64+
Experiment Score: {{ exp_and_feedback[0].result.loc["ensemble"].iloc[0] }}
65+
Experiment Feedback: {{ exp_and_feedback[1].reason }}
66+
{% if exp_and_feedback[1].code_change_summary %}Code Change Summary: {{ exp_and_feedback[1].code_change_summary }}{% endif %}
67+
{% endif %}
68+
{% endfor %}
69+
{% if leaf.best_exp %}
70+
### Best Code (last experiment)
71+
{{ leaf.best_exp.experiment_workspace.all_codes }}
72+
{% else %}
73+
No final code available for this leaf.
74+
{% endif %}
75+
{% endif %}
76+
{% endfor %}
77+
{% endif %}
4578
hypothesis_gen:
4679
system: |-
4780
{% include "scenarios.data_science.share:scen.role" %}
@@ -66,7 +99,11 @@ hypothesis_gen:
6699
2. Note on Time/Memory Constraints
67100
- If prior experiments failed due to time/memory limitations, assume your new hypothesis will face the same constraints. In this case, prioritize efficiency and **ONLY** response to the problems related to time/memory constraints in your response dictionary.
68101
- Besides, do not compromise performance merely for efficiency since the current SOTA implementation do not encounter the constraints. You should think about how to balance the efficiency and performance so that your new hypothesis can be executed successfully and achieve satisfactory performance.
69-
102+
3. Multi-Branch Learning
103+
- When multiple branches (e.g., tabular, image, ensemble, workflow) have been explored, extract stable wins (changes that improved results repeatedly) and fragile wins (improvements with high variance or heavy cost).
104+
- Prefer single-variable changes; if a proposal touches multiple components, split it into multiple hypotheses.
105+
- When porting ideas from other branches, state the minimal integration and the specific acceptance/rollback condition.
106+
70107
# Task 2: Hypothesis Evaluation
71108
## Evaluation Instruction
72109
Firstly, you should tag the hypothesis with one of the following components. If the hypothesis is related to multiple components, you should choose the most relevant one.

rdagent/scenarios/data_science/proposal/exp_gen/router/__init__.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -77,23 +77,10 @@ async def async_gen(self, trace: DSTrace, loop: LoopBase) -> DSExperiment:
7777
if loop.get_unfinished_loop_cnt(loop.loop_idx) < RD_AGENT_SETTINGS.get_max_parallel():
7878
# set trace current selection
7979
leaves: list[int] = trace.get_leaves()
80-
if not timer.started or timer.remain_time() >= timedelta(hours=DS_RD_SETTING.merge_hours):
81-
local_selection = await self.trace_scheduler.next(trace)
80+
local_selection = await self.trace_scheduler.next(trace)
8281

83-
# set the local selection as the global current selection for the trace
84-
trace.set_current_selection(local_selection)
85-
else:
86-
if len(leaves) < 2:
87-
local_selection = (-1,)
88-
trace.set_current_selection(selection=local_selection)
89-
else:
90-
local_selection = (leaves[0],)
91-
if trace.sota_exp_to_submit is not None:
92-
for i in range(1, len(leaves)):
93-
if trace.is_parent(trace.exp2idx(trace.sota_exp_to_submit), leaves[i]):
94-
local_selection = (leaves[i],)
95-
break
96-
trace.set_current_selection(local_selection)
82+
# set the local selection as the global current selection for the trace
83+
trace.set_current_selection(local_selection)
9784

9885
ds_plan = self.planner.plan(trace) if DS_RD_SETTING.enable_planner else DSExperimentPlan()
9986

0 commit comments

Comments
 (0)