fix: Add framework handling for task coding failure. (#176)

WinstonLiyt · web-flow · commit 5e14fa54a9dd · 2024-08-07T19:01:46.000+08:00
* Add framework handling for task coding failure.

* fix a ci bug
diff --git a/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py b/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
@@ -671,6 +671,10 @@ def evaluate(
         ]
         logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")
 
+        for index in range(len(evo.sub_tasks)):
+            if final_decision[index]:
+                evo.sub_tasks[index].factor_implementation = True
+
         return multi_implementation_feedback
 
 
diff --git a/rdagent/components/coder/factor_coder/factor.py b/rdagent/components/coder/factor_coder/factor.py
@@ -26,19 +26,30 @@ def __init__(
         factor_formulation,
         variables: dict = {},
         resource: str = None,
+        factor_implementation: bool = False,
     ) -> None:
         self.factor_name = factor_name
         self.factor_description = factor_description
         self.factor_formulation = factor_formulation
         self.variables = variables
         self.factor_resources = resource
+        self.factor_implementation = factor_implementation
 
     def get_task_information(self):
         return f"""factor_name: {self.factor_name}
 factor_description: {self.factor_description}
 factor_formulation: {self.factor_formulation}
 variables: {str(self.variables)}"""
 
+    def get_task_information_and_implementation_result(self):
+        return {
+            "factor_name": self.factor_name,
+            "factor_description": self.factor_description,
+            "factor_formulation": self.factor_formulation,
+            "variables": str(self.variables),
+            "factor_implementation": str(self.factor_implementation),
+        }
+
     @staticmethod
     def from_dict(dict):
         return FactorTask(**dict)
diff --git a/rdagent/scenarios/qlib/developer/feedback.py b/rdagent/scenarios/qlib/developer/feedback.py
@@ -72,7 +72,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
         logger.info("Generating feedback...")
         hypothesis_text = hypothesis.hypothesis
         current_result = exp.result
-        tasks_factors = [task.get_task_information() for task in exp.sub_tasks]
+        tasks_factors = [task.get_task_information_and_implementation_result() for task in exp.sub_tasks]
         sota_result = exp.based_experiments[-1].result
 
         # Process the results to filter important metrics
diff --git a/rdagent/scenarios/qlib/prompts.yaml b/rdagent/scenarios/qlib/prompts.yaml
@@ -172,9 +172,18 @@ factor_feedback_generation:
     Target hypothesis: 
     {{ hypothesis_text }}
     Tasks and Factors:
-    {{ task_details }}
+    {% for task in task_details %}
+      - {{ task.factor_name }}: {{ task.factor_description }}
+        - Factor Formulation: {{ task.factor_formulation }}
+        - Variables: {{ task.variables }}
+        - Factor Implementation: {{ task.factor_implementation }}
+        {% if task.factor_implementation == "False" %}
+        **Note: This factor was not implemented in the current experiment. Only the hypothesis for implemented factors can be verified.**
+        {% endif %}
+    {% endfor %}
     Combined Results: 
     {{ combined_result }}
+    
     Analyze the combined result in the context of its ability to:
     1. Support or refute the hypothesis.
     2. Show improvement or deterioration compared to the SOTA experiment.
@@ -197,6 +206,8 @@ factor_feedback_generation:
       - If the new results significantly differ from the SOTA, consider exploring a new direction.
       - Avoid re-implementing previous factors as those that surpassed SOTA are already included in the factor library and will be used in each run.
 
+    Note: Only factors with 'Factor Implementation' as True are implemented and tested in this experiment. If 'Factor Implementation' is False, the hypothesis for that factor cannot be verified in this run.
+
 model_feedback_generation:
   system: |-
     You are a professional result analysis assistant. You will receive a result and a hypothesis.

Original file line number	Diff line number	Diff line change
`@@ -671,6 +671,10 @@ def evaluate(`
`671`	`671`	`]`
`672`	`672`	`logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")`
`673`	`673`
	`674`	`+ for index in range(len(evo.sub_tasks)):`
	`675`	`+ if final_decision[index]:`
	`676`	`+ evo.sub_tasks[index].factor_implementation = True`
	`677`	`+`
`674`	`678`	`return multi_implementation_feedback`
`675`	`679`
`676`	`680`