Skip to content

Commit b82e597

Browse files
authored
add inf evaluator to factor costeer and some minor improvement (#435)
1 parent 657f6b5 commit b82e597

File tree

3 files changed

+40
-3
lines changed

3 files changed

+40
-3
lines changed

rdagent/components/coder/factor_coder/CoSTEER/evaluators.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,28 @@ def evaluate(
133133
return critic_response, None
134134

135135

136+
class FactorInfEvaluator(FactorEvaluator):
137+
def evaluate(
138+
self,
139+
implementation: Workspace,
140+
gt_implementation: Workspace,
141+
) -> Tuple[str, object]:
142+
_, gen_df = self._get_df(gt_implementation, implementation)
143+
if gen_df is None:
144+
return (
145+
"The source dataframe is None. Please check the implementation.",
146+
False,
147+
)
148+
INF_count = gen_df.isin([float("inf"), -float("inf")]).sum().sum()
149+
if INF_count == 0:
150+
return "The source dataframe does not have any infinite values.", True
151+
else:
152+
return (
153+
f"The source dataframe has {INF_count} infinite values. Please check the implementation.",
154+
False,
155+
)
156+
157+
136158
class FactorSingleColumnEvaluator(FactorEvaluator):
137159
def evaluate(
138160
self,
@@ -417,6 +439,9 @@ def evaluate(
417439
"Output dataframe has more columns than input feature which is not acceptable in feature processing tasks. Please check the implementation to avoid generating too many columns. Consider this implementation as a failure."
418440
)
419441

442+
feedback_str, inf_evaluate_res = FactorInfEvaluator(self.scen).evaluate(implementation, gt_implementation)
443+
conclusions.append(feedback_str)
444+
420445
# Check if the index of the dataframe is ("datetime", "instrument")
421446
feedback_str, _ = FactorOutputFormatEvaluator(self.scen).evaluate(implementation, gt_implementation)
422447
conclusions.append(feedback_str)
@@ -465,6 +490,7 @@ def evaluate(
465490
and row_result <= 0.99
466491
or output_format_result is False
467492
or daily_check_result is False
493+
or inf_evaluate_res is False
468494
):
469495
decision_from_value_check = False
470496
else:

rdagent/scenarios/qlib/experiment/prompts.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ qlib_factor_output_format: |-
6565
0 your factor name 40914 non-null float64
6666
dtypes: float64(1)
6767
memory usage: <ignore>
68-
None
68+
Notice: The non-null count is OK to be different to the total number of entries since some instruments may not have the factor value on some days.
6969
One possible format of `result.h5` may be like following:
7070
datetime instrument
7171
2020-01-02 SZ000001 -0.001796

rdagent/scenarios/qlib/experiment/utils.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,21 @@ def get_file_desc(p: Path) -> str:
8585

8686
buffer = io.StringIO()
8787
df.info(verbose=True, buf=buffer, show_counts=False)
88+
89+
df_info = buffer.getvalue()
90+
if isinstance(df.index, pd.MultiIndex):
91+
df_info += f"\nMultiIndex names:, {df.index.names})"
92+
if "REPORT_PERIOD" in df.columns:
93+
one_instrument = df.index.get_level_values("instrument")[0]
94+
df_on_one_instrument = df.loc[pd.IndexSlice[:, one_instrument], ["REPORT_PERIOD"]]
95+
df_info += f"""
96+
A snapshot of one instrument, from which you can tell the distribution of the data:
97+
{df_on_one_instrument.head(10)}
98+
"""
8899
return JJ_TPL.render(
89100
file_name=p.name,
90-
type_desc="generated by `df.info(verbose=True, show_counts=False)`",
91-
content=buffer.getvalue(),
101+
type_desc="h5, generated by `df.info(verbose=True, show_counts=False)` and appendix info",
102+
content=df_info,
92103
)
93104
elif p.name.endswith(".md"):
94105
with open(p) as f:

0 commit comments

Comments
 (0)