refactor: Improve data handling and representation in eval tests and prompts (#740)

you-n-g · web-flow · commit 270ff7c46a46 · 2025-04-03T13:53:25.000+08:00
diff --git a/rdagent/components/coder/data_science/feature/eval_tests/feature_test.txt b/rdagent/components/coder/data_science/feature/eval_tests/feature_test.txt
@@ -2,28 +2,33 @@
 Tests for `feat_eng` in feature.py
 """
 
-import pickle
-from copy import deepcopy
 
+from copy import deepcopy
+import sys
 import numpy as np
 import pandas as pd
 from feature import feat_eng
 from load_data import load_data
+import reprlib
+aRepr = reprlib.Repr()
+aRepr.maxother=300
 
 X, y, X_test, test_ids = load_data()
-print(f"X.shape: {X.shape}")
-print(f"y.shape: {y.shape}" if not isinstance(y, list) else f"y(list)'s length: {len(y)}")
-print(f"X_test.shape: {X_test.shape}")
+print("X:", aRepr.repr(X))
+print("y:", aRepr.repr(y))
+print("X_test:", aRepr.repr(X_test))
+print("test_ids", aRepr.repr(test_ids))
+
+print(f"X.shape: {X.shape}" if hasattr(X, 'shape') else f"X length: {len(X)}")
+print(f"y.shape: {y.shape}" if hasattr(y, 'shape') else f"y length: {len(y)}")
+print(f"X_test.shape: {X_test.shape}" if hasattr(X_test, 'shape') else f"X_test length: {len(X_test)}")
 print(f"test_ids length: {len(test_ids)}")
+
 X_loaded = deepcopy(X)
 y_loaded = deepcopy(y)
 X_test_loaded = deepcopy(X_test)
 
-import sys
-import reprlib
 def debug_info_print(func):
-    aRepr = reprlib.Repr()
-    aRepr.maxother=300
     def wrapper(*args, **kwargs):
         def local_trace(frame, event, arg):
             if event == "return" and frame.f_code == func.__code__:
@@ -44,7 +49,7 @@ X, y, X_test = debug_info_print(feat_eng)(X, y, X_test)
 
 
 def get_length(data):
-    return len(data) if isinstance(data, list) else data.shape[0]
+    return data.shape[0] if hasattr(data, 'shape') else len(data)
 
 
 def get_width(data):
diff --git a/rdagent/components/coder/data_science/model/eval_tests/model_test.txt b/rdagent/components/coder/data_science/model/eval_tests/model_test.txt
@@ -1,6 +1,7 @@
 """
 Tests for `model_workflow` in model01.py
 """
+import sys
 import time
 
 from feature import feat_eng
@@ -19,20 +20,33 @@ def log_execution_results(start_time, val_pred, test_pred, hypers, execution_lab
     print(feedback_str)
 
 
+import reprlib
+aRepr = reprlib.Repr()
+aRepr.maxother=300
+
 # Load and preprocess data
 X, y, test_X, test_ids = load_data()
 X, y, test_X = feat_eng(X, y, test_X)
+
+print(f"X.shape: {X.shape}" if hasattr(X, 'shape') else f"X length: {len(X)}")
+print(f"y.shape: {y.shape}" if hasattr(y, 'shape') else f"y length: {len(y)}")
+print(f"test_X.shape: {test_X.shape}" if hasattr(test_X, 'shape') else f"test_X length: {len(test_X)}")
+print(f"test_ids length: {len(test_ids)}")
+
 train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.8, random_state=42)
-print(f"train_X.shape: {train_X.shape}")
-print(f"train_y.shape: {train_y.shape}" if not isinstance(train_y, list) else f"train_y(list)'s length: {len(train_y)}")
-print(f"val_X.shape: {val_X.shape}")
-print(f"val_y.shape: {val_y.shape}" if not isinstance(val_y, list) else f"val_y(list)'s length: {len(val_y)}")
 
-import sys
-import reprlib
+print("train_X:", aRepr.repr(train_X))
+print("train_y:", aRepr.repr(train_y))
+print("val_X:", aRepr.repr(val_X))
+print("val_y:", aRepr.repr(val_y))
+
+print(f"train_X.shape: {train_X.shape}" if hasattr(train_X, 'shape') else f"train_X length: {len(train_X)}")
+print(f"train_y.shape: {train_y.shape}" if hasattr(train_y, 'shape') else f"train_y length: {len(train_y)}")
+print(f"val_X.shape: {val_X.shape}" if hasattr(val_X, 'shape') else f"val_X length: {len(val_X)}")
+print(f"val_y.shape: {val_y.shape}" if hasattr(val_y, 'shape') else f"val_y length: {len(val_y)}")
+
+
 def debug_info_print(func):
-    aRepr = reprlib.Repr()
-    aRepr.maxother=300
     def wrapper(*args, **kwargs):
         def local_trace(frame, event, arg):
             if event == "return" and frame.f_code == func.__code__:
diff --git a/rdagent/components/coder/data_science/raw_data_loader/eval_tests/data_loader_test.txt b/rdagent/components/coder/data_science/raw_data_loader/eval_tests/data_loader_test.txt
@@ -33,11 +33,11 @@ X, y, X_test, test_ids = debug_info_print(load_data)()
 
 
 def get_length(data):
-    return len(data) if isinstance(data, list) else data.shape[0]
+    return data.shape[0] if hasattr(data, 'shape') else len(data)
 
 
 def get_width(data):
-    return 1 if isinstance(data, list) else data.shape[1:]
+    return data.shape[1:] if hasattr(data, 'shape') else 1
 
 
 def get_column_list(data):
diff --git a/rdagent/components/coder/data_science/raw_data_loader/prompts.yaml b/rdagent/components/coder/data_science/raw_data_loader/prompts.yaml
@@ -410,9 +410,7 @@ data_loader_eval:
     The data loader is part of the whole workflow. The user has executed the entire pipeline and provided additional stdout.
 
     **Workflow Code:**
-    ```python
     {{ workflow_code }}
-    ```
 
     You should evaluate both the data loader test results and the overall workflow execution. **Approve the code only if both tests pass.**
     {% endif %}
diff --git a/rdagent/scenarios/data_science/share.yaml b/rdagent/scenarios/data_science/share.yaml
@@ -101,6 +101,7 @@ component_spec:
       - Optimize memory usage for large datasets using techniques like downcasting or reading data in chunks if necessary.
       - Domain-Specific Handling: 
         - Apply competition-specific preprocessing steps as needed (e.g., text tokenization, image resizing).
+        - Instead of returning binary bytes directly, convert/decode them into more useful formats like numpy.ndarrays.
 
     3. Code Standards:
       - DO NOT use progress bars (e.g., `tqdm`).