Skip to content

Commit 6d01e3e

Browse files
authored
feat: refactor CoSTEER classes to use DSCoSTEER and update max seconds handling (#1156)
* feat: refactor CoSTEER classes to use DSCoSTEER and update max seconds handling * remove useless line * enable time_ratio_limit_to_enable_hyperparameter_tuning
1 parent 2fa1790 commit 6d01e3e

File tree

13 files changed

+53
-43
lines changed

13 files changed

+53
-43
lines changed

rdagent/app/kaggle/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ class KaggleBasePropSetting(ExtendedBaseSettings):
7575
mini_case: bool = False
7676
"""Enable mini-case study for experiments"""
7777

78-
time_ratio_limit_to_enable_hyperparameter_tuning: int | None = None
79-
"""Time ratio limit to enable hyperparameter tuning, if None, hyperparameter tuning is always enabled in the first evolution."""
78+
time_ratio_limit_to_enable_hyperparameter_tuning: float = 1
79+
"""Time ratio limit to enable hyperparameter tuning, if not change, hyperparameter tuning is always enabled in the first evolution."""
8080

8181

8282
KAGGLE_IMPLEMENT_SETTING = KaggleBasePropSetting()

rdagent/components/coder/CoSTEER/__init__.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import pickle
21
from copy import deepcopy
32
from datetime import datetime
43
from pathlib import Path
@@ -26,7 +25,6 @@ def __init__(
2625
es: EvolvingStrategy,
2726
*args,
2827
evolving_version: int = 2,
29-
max_seconds: int | None = None,
3028
with_knowledge: bool = True,
3129
knowledge_self_gen: bool = True,
3230
max_loop: int | None = None,
@@ -36,7 +34,6 @@ def __init__(
3634
self.settings = settings
3735

3836
self.max_loop = settings.max_loop if max_loop is None else max_loop
39-
self.max_seconds = max_seconds
4037
self.knowledge_base_path = (
4138
Path(settings.knowledge_base_path) if settings.knowledge_base_path is not None else None
4239
)
@@ -67,6 +64,13 @@ def __init__(
6764
)
6865
)
6966

67+
def get_develop_max_seconds(self) -> int | None:
68+
"""
69+
Get the maximum seconds for the develop task.
70+
Sub classes might override this method to provide a different value.
71+
"""
72+
return None
73+
7074
def _get_last_fb(self) -> CoSTEERMultiFeedback:
7175
fb = self.evolve_agent.evolving_trace[-1].feedback
7276
assert fb is not None, "feedback is None"
@@ -76,6 +80,7 @@ def _get_last_fb(self) -> CoSTEERMultiFeedback:
7680
def develop(self, exp: Experiment) -> Experiment:
7781

7882
# init intermediate items
83+
max_seconds = self.get_develop_max_seconds()
7984
evo_exp = EvolvingItem.from_experiment(exp)
8085

8186
self.evolve_agent = RAGEvoAgent[EvolvingItem](
@@ -102,8 +107,8 @@ def develop(self, exp: Experiment) -> Experiment:
102107
logger.log_object(evo_exp.sub_workspace_list, tag="evolving code")
103108
for sw in evo_exp.sub_workspace_list:
104109
logger.info(f"evolving workspace: {sw}")
105-
if self.max_seconds is not None and (datetime.now() - start_datetime).seconds > self.max_seconds:
106-
logger.info(f"Reached max time limit {self.max_seconds} seconds, stop evolving")
110+
if max_seconds is not None and (datetime.now() - start_datetime).seconds > max_seconds:
111+
logger.info(f"Reached max time limit {max_seconds} seconds, stop evolving")
107112
reached_max_seconds = True
108113
break
109114
if RD_Agent_TIMER_wrapper.timer.started and RD_Agent_TIMER_wrapper.timer.is_timeout():
@@ -140,7 +145,7 @@ def _exp_postprocess_by_feedback(self, evo: Experiment, feedback: CoSTEERMultiFe
140145
failed_feedbacks = [
141146
f"- feedback{index + 1:02d}:\n - execution: {f.execution}\n - return_checking: {f.return_checking}\n - code: {f.code}"
142147
for index, f in enumerate(feedback)
143-
if f is not None and not f.final_decision
148+
if f is not None and not f.is_acceptable()
144149
]
145150

146151
if len(failed_feedbacks) == len(feedback):

rdagent/components/coder/data_science/ensemble/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from jinja2 import Environment, StrictUndefined
1717

1818
from rdagent.app.data_science.conf import DS_RD_SETTING
19-
from rdagent.components.coder.CoSTEER import CoSTEER
2019
from rdagent.components.coder.CoSTEER.evaluators import (
2120
CoSTEERMultiEvaluator,
2221
CoSTEERSingleFeedback,
@@ -30,6 +29,7 @@
3029
from rdagent.components.coder.data_science.conf import DSCoderCoSTEERSettings
3130
from rdagent.components.coder.data_science.ensemble.eval import EnsembleCoSTEEREvaluator
3231
from rdagent.components.coder.data_science.ensemble.exp import EnsembleTask
32+
from rdagent.components.coder.data_science.share.ds_costeer import DSCoSTEER
3333
from rdagent.core.exception import CoderError
3434
from rdagent.core.experiment import FBWorkspace
3535
from rdagent.core.scenario import Scenario
@@ -141,7 +141,7 @@ def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
141141
return evo
142142

143143

144-
class EnsembleCoSTEER(CoSTEER):
144+
class EnsembleCoSTEER(DSCoSTEER):
145145
def __init__(
146146
self,
147147
scen: Scenario,
@@ -160,6 +160,5 @@ def __init__(
160160
evolving_version=2,
161161
scen=scen,
162162
max_loop=DS_RD_SETTING.coder_max_loop,
163-
max_seconds=scen.real_debug_timeout() * settings.max_seconds_multiplier,
164163
**kwargs,
165164
)

rdagent/components/coder/data_science/feature/__init__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
import json
21
from pathlib import Path
3-
from typing import Dict
42

53
from rdagent.app.data_science.conf import DS_RD_SETTING
6-
from rdagent.components.coder.CoSTEER import CoSTEER
74
from rdagent.components.coder.CoSTEER.evaluators import (
85
CoSTEERMultiEvaluator,
96
CoSTEERSingleFeedback,
@@ -17,6 +14,7 @@
1714
from rdagent.components.coder.data_science.conf import DSCoderCoSTEERSettings
1815
from rdagent.components.coder.data_science.feature.eval import FeatureCoSTEEREvaluator
1916
from rdagent.components.coder.data_science.feature.exp import FeatureTask
17+
from rdagent.components.coder.data_science.share.ds_costeer import DSCoSTEER
2018
from rdagent.core.exception import CoderError
2119
from rdagent.core.experiment import FBWorkspace
2220
from rdagent.core.scenario import Scenario
@@ -117,7 +115,7 @@ def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
117115
return evo
118116

119117

120-
class FeatureCoSTEER(CoSTEER):
118+
class FeatureCoSTEER(DSCoSTEER):
121119
def __init__(
122120
self,
123121
scen: Scenario,
@@ -138,6 +136,5 @@ def __init__(
138136
evolving_version=2,
139137
scen=scen,
140138
max_loop=DS_RD_SETTING.coder_max_loop,
141-
max_seconds=scen.real_debug_timeout() * settings.max_seconds_multiplier,
142139
**kwargs,
143140
)

rdagent/components/coder/data_science/model/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from pathlib import Path
2-
from typing import Dict
32

43
from rdagent.app.data_science.conf import DS_RD_SETTING
5-
from rdagent.components.coder.CoSTEER import CoSTEER
64
from rdagent.components.coder.CoSTEER.evaluators import (
75
CoSTEERMultiEvaluator,
86
CoSTEERSingleFeedback,
@@ -18,6 +16,7 @@
1816
ModelGeneralCaseSpecEvaluator,
1917
)
2018
from rdagent.components.coder.data_science.model.exp import ModelTask
19+
from rdagent.components.coder.data_science.share.ds_costeer import DSCoSTEER
2120
from rdagent.core.exception import CoderError
2221
from rdagent.core.experiment import FBWorkspace
2322
from rdagent.core.scenario import Scenario
@@ -148,7 +147,7 @@ def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
148147
return evo
149148

150149

151-
class ModelCoSTEER(CoSTEER):
150+
class ModelCoSTEER(DSCoSTEER):
152151
def __init__(
153152
self,
154153
scen: Scenario,
@@ -170,6 +169,5 @@ def __init__(
170169
evolving_version=2,
171170
scen=scen,
172171
max_loop=DS_RD_SETTING.coder_max_loop,
173-
max_seconds=scen.real_debug_timeout() * settings.max_seconds_multiplier,
174172
**kwargs,
175173
)

rdagent/components/coder/data_science/pipeline/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from pathlib import Path
2626

2727
from rdagent.app.data_science.conf import DS_RD_SETTING
28-
from rdagent.components.coder.CoSTEER import CoSTEER
2928
from rdagent.components.coder.CoSTEER.evaluators import (
3029
CoSTEERMultiEvaluator,
3130
CoSTEERSingleFeedback,
@@ -39,6 +38,7 @@
3938
from rdagent.components.coder.data_science.conf import DSCoderCoSTEERSettings
4039
from rdagent.components.coder.data_science.pipeline.eval import PipelineCoSTEEREvaluator
4140
from rdagent.components.coder.data_science.pipeline.exp import PipelineTask
41+
from rdagent.components.coder.data_science.share.ds_costeer import DSCoSTEER
4242
from rdagent.components.coder.data_science.share.eval import ModelDumpEvaluator
4343
from rdagent.core.exception import CoderError
4444
from rdagent.core.experiment import FBWorkspace
@@ -130,7 +130,7 @@ def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
130130
return evo
131131

132132

133-
class PipelineCoSTEER(CoSTEER):
133+
class PipelineCoSTEER(DSCoSTEER):
134134
def __init__(
135135
self,
136136
scen: Scenario,
@@ -159,6 +159,5 @@ def __init__(
159159
evolving_version=2,
160160
scen=scen,
161161
max_loop=DS_RD_SETTING.coder_max_loop,
162-
max_seconds=scen.real_debug_timeout() * settings.max_seconds_multiplier,
163162
**kwargs,
164163
)

rdagent/components/coder/data_science/raw_data_loader/__init__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,10 @@
2222
- Each coder could be tested.
2323
"""
2424

25-
import json
2625
import re
2726
from pathlib import Path
28-
from typing import Dict
2927

3028
from rdagent.app.data_science.conf import DS_RD_SETTING
31-
from rdagent.components.coder.CoSTEER import CoSTEER
3229
from rdagent.components.coder.CoSTEER.evaluators import (
3330
CoSTEERMultiEvaluator,
3431
CoSTEERSingleFeedback,
@@ -47,6 +44,7 @@
4744
DataLoaderCoSTEEREvaluator,
4845
)
4946
from rdagent.components.coder.data_science.raw_data_loader.exp import DataLoaderTask
47+
from rdagent.components.coder.data_science.share.ds_costeer import DSCoSTEER
5048
from rdagent.core.exception import CoderError
5149
from rdagent.core.experiment import FBWorkspace
5250
from rdagent.core.scenario import Scenario
@@ -197,7 +195,7 @@ def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
197195
return evo
198196

199197

200-
class DataLoaderCoSTEER(CoSTEER):
198+
class DataLoaderCoSTEER(DSCoSTEER):
201199
def __init__(
202200
self,
203201
scen: Scenario,
@@ -218,7 +216,6 @@ def __init__(
218216
evolving_version=2,
219217
scen=scen,
220218
max_loop=DS_RD_SETTING.coder_max_loop,
221-
max_seconds=scen.real_debug_timeout() * settings.max_seconds_multiplier,
222219
**kwargs,
223220
)
224221

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from rdagent.components.coder.CoSTEER import CoSTEER
2+
3+
4+
class DSCoSTEER(CoSTEER):
5+
def get_develop_max_seconds(self) -> int | None:
6+
"""
7+
The coder uses the scenario's real debug timeout as the maximum seconds for development.
8+
"""
9+
return int(self.scen.real_debug_timeout() * self.settings.max_seconds_multiplier)

rdagent/components/coder/data_science/workflow/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import json
2-
from typing import Dict
3-
41
from rdagent.app.data_science.conf import DS_RD_SETTING
5-
from rdagent.components.coder.CoSTEER import CoSTEER
62
from rdagent.components.coder.CoSTEER.evaluators import (
73
CoSTEERMultiEvaluator,
84
CoSTEERSingleFeedback,
@@ -14,6 +10,7 @@
1410
CoSTEERQueriedKnowledge,
1511
)
1612
from rdagent.components.coder.data_science.conf import DSCoderCoSTEERSettings
13+
from rdagent.components.coder.data_science.share.ds_costeer import DSCoSTEER
1714
from rdagent.components.coder.data_science.workflow.eval import (
1815
WorkflowGeneralCaseSpecEvaluator,
1916
)
@@ -111,7 +108,7 @@ def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
111108
return evo
112109

113110

114-
class WorkflowCoSTEER(CoSTEER):
111+
class WorkflowCoSTEER(DSCoSTEER):
115112
def __init__(
116113
self,
117114
scen: Scenario,
@@ -131,6 +128,5 @@ def __init__(
131128
evolving_version=2,
132129
scen=scen,
133130
max_loop=DS_RD_SETTING.coder_max_loop,
134-
max_seconds=scen.real_debug_timeout() * settings.max_seconds_multiplier,
135131
**kwargs,
136132
)

rdagent/core/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class RDAgentSettings(ExtendedBaseSettings):
5656
# workspace conf
5757
workspace_path: Path = Path.cwd() / "git_ignore_folder" / "RD-Agent_workspace"
5858
workspace_ckp_size_limit: int = 0
59+
workspace_ckp_white_list_names: list[str] | None = None
5960
"""
6061
the checkpoint for the workspace is a zip file.
6162
0 (or any value <=0) means *no* size limit for files in workspace checkpoints

0 commit comments

Comments
 (0)