Skip to content

Commit 6ce5d51

Browse files
committed
debugv2
1 parent f9dd514 commit 6ce5d51

File tree

3 files changed

+29
-30
lines changed

3 files changed

+29
-30
lines changed

rdagent/scenarios/data_science/dev/runner/eval.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,18 @@ def evaluate(
5050
queried_knowledge: QueriedKnowledge = None,
5151
**kwargs,
5252
) -> DSCoSTEEREvalFeedback:
53-
53+
if "Ensemble" in target_task.name:
54+
running_timeout_period = DS_RD_SETTING.ensemble_timeout
55+
else:
56+
running_timeout_period = DS_RD_SETTING.full_timeout
5457
env = get_ds_env(
55-
extra_volumes={
56-
f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": T(
57-
"scenarios.data_science.share:scen.input_path"
58-
).r()
59-
},
60-
running_timeout_period=DS_RD_SETTING.full_timeout,
61-
)
58+
extra_volumes={
59+
f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": T(
60+
"scenarios.data_science.share:scen.input_path"
61+
).r()
62+
},
63+
running_timeout_period=running_timeout_period,
64+
)
6265

6366
stdout = implementation.execute(
6467
env=env, entry=get_clear_ws_cmd()

rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,9 @@ scenario_description: |-
145145
146146
{% if time_limit %}
147147
====== Time Limit ======
148-
Your code's execution is limited to **{{ time_limit }}**. After this time limit, your code will be terminated. But remember your main target is to achieve the best performance and you have several times to modify your code. So please be bold to make the best use of all the time limit and don't be too conservative.
149-
During this time limit, you have all the resources available to you. Please fully leverage all the computational resources(CPUs and GPUs) to achieve the best performance like choose a powerful model, use a large batch size, enable data sampler with big parallel.
148+
Your code's execution is limited to {{ time_limit }}. After this time limit, your code will be terminated. However, remember that your primary objective is to achieve the best possible performance, and you're allowed to revise your code multiple times. So be bold — make full use of the entire time limit, and don’t be too conservative.
149+
During this period, you have full access to computational resources (CPUs and GPUs). Please take advantage of them: choose powerful models, use large batch sizes, and enable parallelism (e.g., large-scale data sampling or multi-GPU training) where applicable.
150+
If your code involves ensemble training, note that the total time allowed for ensemble runs is {{ ensemble_limit }}. Make sure to plan your ensemble strategy wisely within this limit.
150151
{% endif %}
151152
152153
hypothesis_gen:
@@ -269,10 +270,13 @@ hypothesis_select:
269270
If multiple hypotheses seem reasonable, select the one that is most robust or consistent with Previous Experiments and Feedbacks, pay attention to the runtime of each loop.
270271
271272
If you believe that previous methods have reached their limits and the current setting only involves a single model, feel free to propose an ensemble solution. However, you **must** carefully allocate the training and runtime budget to ensure the **ensemble logic is well-executed and evaluated**, without compromising the performance of the previous models.
272-
273+
273274
### 1. Ensemble Core Principle
274275
Your goal is not just to tune individual models, but to build an **effective ensemble**. Make design decisions that lead to **strong overall ensemble performance**, not just strong base models.
275276
Please note: you are operating under a time budget dedicated to ensemble training of {{res_time}} seconds, and the maximum allowed time is {{ensemble_timeout}} seconds.
277+
{{use_ratio}}% of the total ensemble time has been used. As this surpasses the 70% threshold, you are advised to shift focus toward optimizing the ensemble component rather than continuing with model, data, feature, or workflow exploration.
278+
Please take the remaining {{res_time}} seconds to carefully consider and design the most reasonable and optimal ensemble hypothesis based on your current progress.
279+
276280
Assume training a single model takes about 1 hour. For example, if you have roughly twice that time left, you can try training multiple models with different random seeds or data splits to reuse time effectively.
277281
If you have more time, you might consider training a multi-fold ensemble. Use your judgment to decide how many folds or seeds fit within your remaining time budget.
278282

rdagent/scenarios/data_science/proposal/exp_gen/proposal.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pandas as pd
66
from pydantic import BaseModel, Field
7-
7+
from rdagent.oai.backend.base import RD_Agent_TIMER_wrapper
88
from rdagent.log.timer import RDAgentTimer
99
from rdagent.core.conf import RD_AGENT_SETTINGS
1010
import asyncio
@@ -466,16 +466,6 @@ def __init__(self, *args, **kwargs):
466466
super().__init__(*args, **kwargs)
467467
self.supports_response_schema = APIBackend().supports_response_schema()
468468

469-
async def async_gen(self, trace, loop):
470-
"""
471-
generate the experiment and decide whether to stop yield generation and give up control to other routines.
472-
"""
473-
# we give a default implementation here.
474-
# The proposal is set to try best to generate the experiment in max-parallel level.
475-
while True:
476-
if loop.get_unfinished_loop_cnt(loop.loop_idx) < RD_AGENT_SETTINGS.get_max_parallel():
477-
return self.gen(trace, loop.timer)
478-
await asyncio.sleep(1)
479469

480470
def identify_scenario_problem(self, scenario_desc: str, sota_exp_desc: str) -> Dict:
481471
sys_prompt = T(".prompts_v2:scenario_problem.system").r(
@@ -733,24 +723,26 @@ def hypothesis_select_with_llm(self,
733723
scenario_desc: str,
734724
exp_feedback_list_desc: str,
735725
sota_exp_desc: str,
736-
hypothesis_candidates:dict,
737-
timer: RDAgentTimer
738-
):
726+
hypothesis_candidates:dict):
739727

740728
# time_use_current = 0
741729
# for exp, feedback in trace.hist:
742730
# if exp.running_info.running_time is not None:
743731
# time_use_current += exp.running_info.running_time
744732
# res_time = 12*3600 - time_use_current
745-
res_time = timer.remain_time()
733+
res_time = RD_Agent_TIMER_wrapper.timer.remain_time()
734+
total_time = RD_Agent_TIMER_wrapper.timer.all_duration
735+
use_time = total_time.seconds - res_time.seconds
736+
use_ratio = 100* use_time // total_time.seconds
746737

747738
ensemble_timeout = DS_RD_SETTING.ensemble_timeout
748739
hypothesis_candidates = str(json.dumps(hypothesis_candidates, indent=2))
749740

750741
sys_prompt = T(".prompts_v2:hypothesis_select.system").r(
751742
hypothesis_candidates = hypothesis_candidates,
752-
res_time = res_time,
743+
res_time = res_time.seconds,
753744
ensemble_timeout = ensemble_timeout,
745+
use_ratio = use_ratio,
754746
hypothesis_output_format = T(".prompts_v2:output_format.hypothesis_select_format").r(hypothesis_candidates = hypothesis_candidates)
755747
)
756748

@@ -854,6 +846,7 @@ def get_scenario_all_desc(self, trace: DSTrace, eda_output=None) -> str:
854846
raw_description=trace.scen.raw_description,
855847
use_raw_description=DS_RD_SETTING.use_raw_description,
856848
time_limit=f"{DS_RD_SETTING.full_timeout / 60 / 60 : .2f} hours",
849+
ensemble_limit = f"{DS_RD_SETTING.ensemble_timeout / 60 / 60 : .2f} hours",
857850
eda_output=eda_output,
858851
)
859852

@@ -876,7 +869,6 @@ def get_all_hypotheses(self, problem_dict: dict, hypothesis_dict: dict) -> list[
876869
def gen(
877870
self,
878871
trace: DSTrace,
879-
timer: RDAgentTimer
880872
) -> DSExperiment:
881873
pipeline = DS_RD_SETTING.coder_on_whole_pipeline
882874
if not pipeline and (draft_exp := draft_exp_in_decomposition(self.scen, trace)):
@@ -984,8 +976,8 @@ def gen(
984976
response_dict= self.hypothesis_select_with_llm(scenario_desc=scenario_desc,
985977
exp_feedback_list_desc=exp_feedback_list_desc,
986978
sota_exp_desc=sota_exp_desc,
987-
hypothesis_candidates =hypothesis_dict ,
988-
timer=timer)
979+
hypothesis_candidates =hypothesis_dict
980+
)
989981
component_map = {
990982
"Model": HypothesisComponent.Model,
991983
"Ensemble": HypothesisComponent.Ensemble,

0 commit comments

Comments
 (0)