debugv2

jingyuanlm · jingyuanlm · commit 6ce5d5101fcc · 2025-07-18T09:15:10.000Z
diff --git a/rdagent/scenarios/data_science/dev/runner/eval.py b/rdagent/scenarios/data_science/dev/runner/eval.py
@@ -50,15 +50,18 @@ def evaluate(
         queried_knowledge: QueriedKnowledge = None,
         **kwargs,
     ) -> DSCoSTEEREvalFeedback:
-
+        if "Ensemble" in target_task.name:
+            running_timeout_period = DS_RD_SETTING.ensemble_timeout
+        else:
+            running_timeout_period = DS_RD_SETTING.full_timeout
         env = get_ds_env(
-            extra_volumes={
-                f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": T(
-                    "scenarios.data_science.share:scen.input_path"
-                ).r()
-            },
-            running_timeout_period=DS_RD_SETTING.full_timeout,
-        )
+                extra_volumes={
+                    f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": T(
+                        "scenarios.data_science.share:scen.input_path"
+                    ).r()
+                },
+                running_timeout_period=running_timeout_period,
+            )
 
         stdout = implementation.execute(
             env=env, entry=get_clear_ws_cmd()
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml
@@ -145,8 +145,9 @@ scenario_description: |-
 
   {% if time_limit %}
   ====== Time Limit ======
-  Your code's execution is limited to **{{ time_limit }}**. After this time limit, your code will be terminated. But remember your main target is to achieve the best performance and you have several times to modify your code. So please be bold to make the best use of all the time limit and don't be too conservative.
-  During this time limit, you have all the resources available to you. Please fully leverage all the computational resources(CPUs and GPUs) to achieve the best performance like choose a powerful model, use a large batch size, enable data sampler with big parallel.
+  Your code's execution is limited to {{ time_limit }}. After this time limit, your code will be terminated. However, remember that your primary objective is to achieve the best possible performance, and you're allowed to revise your code multiple times. So be bold — make full use of the entire time limit, and don’t be too conservative.
+  During this period, you have full access to computational resources (CPUs and GPUs). Please take advantage of them: choose powerful models, use large batch sizes, and enable parallelism (e.g., large-scale data sampling or multi-GPU training) where applicable.
+  If your code involves ensemble training, note that the total time allowed for ensemble runs is {{ ensemble_limit }}. Make sure to plan your ensemble strategy wisely within this limit.
   {% endif %}
 
 hypothesis_gen:
@@ -269,10 +270,13 @@ hypothesis_select:
     If multiple hypotheses seem reasonable, select the one that is most robust or consistent with Previous Experiments and Feedbacks, pay attention to the runtime of each loop.
 
     If you believe that previous methods have reached their limits and the current setting only involves a single model, feel free to propose an ensemble solution. However, you **must** carefully allocate the training and runtime budget to ensure the **ensemble logic is well-executed and evaluated**, without compromising the performance of the previous models.
-
+    
     ### 1. Ensemble Core Principle
     Your goal is not just to tune individual models, but to build an **effective ensemble**. Make design decisions that lead to **strong overall ensemble performance**, not just strong base models.
     Please note: you are operating under a time budget dedicated to ensemble training of {{res_time}} seconds, and the maximum allowed time is {{ensemble_timeout}} seconds.
+    {{use_ratio}}% of the total ensemble time has been used. As this surpasses the 70% threshold, you are advised to shift focus toward optimizing the ensemble component rather than continuing with model, data, feature, or workflow exploration.
+    Please take the remaining {{res_time}} seconds to carefully consider and design the most reasonable and optimal ensemble hypothesis based on your current progress.
+
     Assume training a single model takes about 1 hour. For example, if you have roughly twice that time left, you can try training multiple models with different random seeds or data splits to reuse time effectively.
     If you have more time, you might consider training a multi-fold ensemble. Use your judgment to decide how many folds or seeds fit within your remaining time budget.
 
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py b/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
@@ -4,7 +4,7 @@
 
 import pandas as pd
 from pydantic import BaseModel, Field
-
+from rdagent.oai.backend.base import RD_Agent_TIMER_wrapper
 from rdagent.log.timer import RDAgentTimer
 from rdagent.core.conf import RD_AGENT_SETTINGS
 import asyncio
@@ -466,16 +466,6 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.supports_response_schema = APIBackend().supports_response_schema()
 
-    async def async_gen(self, trace, loop):
-        """
-        generate the experiment and decide whether to stop yield generation and give up control to other routines.
-        """
-        # we give a default implementation here.
-        # The proposal is set to try best to generate the experiment in max-parallel level.
-        while True:
-            if loop.get_unfinished_loop_cnt(loop.loop_idx) < RD_AGENT_SETTINGS.get_max_parallel():
-                return self.gen(trace, loop.timer)
-            await asyncio.sleep(1)
 
     def identify_scenario_problem(self, scenario_desc: str, sota_exp_desc: str) -> Dict:
         sys_prompt = T(".prompts_v2:scenario_problem.system").r(
@@ -733,24 +723,26 @@ def hypothesis_select_with_llm(self,
                                    scenario_desc: str,
                                    exp_feedback_list_desc: str,
                                    sota_exp_desc: str,
-                                   hypothesis_candidates:dict,
-                                   timer: RDAgentTimer
-                                  ):
+                                   hypothesis_candidates:dict):
         
         # time_use_current = 0
         # for exp, feedback in trace.hist:
         #     if exp.running_info.running_time is not None:
         #         time_use_current += exp.running_info.running_time
         # res_time = 12*3600 - time_use_current
-        res_time = timer.remain_time()
+        res_time = RD_Agent_TIMER_wrapper.timer.remain_time()
+        total_time = RD_Agent_TIMER_wrapper.timer.all_duration
+        use_time = total_time.seconds - res_time.seconds
+        use_ratio = 100* use_time // total_time.seconds 
 
         ensemble_timeout = DS_RD_SETTING.ensemble_timeout
         hypothesis_candidates =  str(json.dumps(hypothesis_candidates, indent=2))
 
         sys_prompt = T(".prompts_v2:hypothesis_select.system").r(
                 hypothesis_candidates = hypothesis_candidates,
-                res_time = res_time,
+                res_time = res_time.seconds,
                 ensemble_timeout = ensemble_timeout,
+                use_ratio = use_ratio,
                 hypothesis_output_format = T(".prompts_v2:output_format.hypothesis_select_format").r(hypothesis_candidates = hypothesis_candidates)
         )
 
@@ -854,6 +846,7 @@ def get_scenario_all_desc(self, trace: DSTrace, eda_output=None) -> str:
             raw_description=trace.scen.raw_description,
             use_raw_description=DS_RD_SETTING.use_raw_description,
             time_limit=f"{DS_RD_SETTING.full_timeout / 60 / 60 : .2f} hours",
+            ensemble_limit = f"{DS_RD_SETTING.ensemble_timeout / 60 / 60 : .2f} hours",
             eda_output=eda_output,
         )
 
@@ -876,7 +869,6 @@ def get_all_hypotheses(self, problem_dict: dict, hypothesis_dict: dict) -> list[
     def gen(
         self,
         trace: DSTrace, 
-        timer: RDAgentTimer
     ) -> DSExperiment:
         pipeline = DS_RD_SETTING.coder_on_whole_pipeline
         if not pipeline and (draft_exp := draft_exp_in_decomposition(self.scen, trace)):
@@ -984,8 +976,8 @@ def gen(
         response_dict= self.hypothesis_select_with_llm(scenario_desc=scenario_desc,
                                     exp_feedback_list_desc=exp_feedback_list_desc,
                                     sota_exp_desc=sota_exp_desc,
-                                    hypothesis_candidates =hypothesis_dict ,
-                                    timer=timer)
+                                    hypothesis_candidates =hypothesis_dict
+                                    )
         component_map = {
             "Model": HypothesisComponent.Model,
             "Ensemble": HypothesisComponent.Ensemble,