fix: Decoupled Downstream Scripts from Training Modes

Tonny@Home · Tonny@Home · commit d2d5146c9d75 · 2026-03-16T22:22:33.000+08:00
diff --git a/quantpits/scripts/ensemble_fusion.py b/quantpits/scripts/ensemble_fusion.py
@@ -491,31 +491,35 @@ def generate_ensemble_signal(norm_df, final_weights, static_weights, is_dynamic)
 # ============================================================================
 def save_predictions(final_score, anchor_date, experiment_name, method,
                      model_names, model_metrics, static_weights, is_dynamic,
-                     output_dir, combo_name=None, is_default=False):
+                     output_dir, combo_name=None, is_default=False,
+                     prediction_dir=None):
     """
     保存融合预测和配置。
 
     Args:
+        output_dir: 配置/报告输出目录
         combo_name: 组合名称（多组合模式下使用）
         is_default: 是否为 default combo（额外保存不带 combo_name 的兼容文件）
+        prediction_dir: 预测 CSV 输出目录 (默认 output/predictions)
     """
     # 保存预测
-    os.makedirs("output/predictions", exist_ok=True)
+    pred_dir = prediction_dir or os.path.join("output", "predictions")
+    os.makedirs(pred_dir, exist_ok=True)
     ensemble_df = final_score.to_frame('score')
 
     # 文件命名：带 combo_name 或不带
     if combo_name:
-        pred_file = f"output/predictions/ensemble_{combo_name}_{anchor_date}.csv"
+        pred_file = os.path.join(pred_dir, f"ensemble_{combo_name}_{anchor_date}.csv")
     else:
-        pred_file = f"output/predictions/ensemble_{anchor_date}.csv"
+        pred_file = os.path.join(pred_dir, f"ensemble_{anchor_date}.csv")
 
     ensemble_df.to_csv(pred_file)
     print(f"\nEnsemble 预测已保存: {pred_file}")
     print(f"Total: {len(ensemble_df)} records")
 
     # default combo 额外保存一份兼容文件
     if combo_name and is_default:
-        compat_file = f"output/predictions/ensemble_{anchor_date}.csv"
+        compat_file = os.path.join(pred_dir, f"ensemble_{anchor_date}.csv")
         ensemble_df.to_csv(compat_file)
         print(f"Default 兼容文件: {compat_file}")
 
@@ -1322,10 +1326,12 @@ def run_single_combo(combo_name, selected_models, method, manual_weights_str,
     )
 
     # ---- Stage 5: 保存预测 ----
+    prediction_dir = getattr(args, 'prediction_dir', None)
     pred_file = save_predictions(
         final_score, anchor_date, experiment_name, method,
         combo_models, combo_metrics, static_weights, is_dynamic,
-        combo_output_dir, combo_name=combo_name, is_default=is_default
+        combo_output_dir, combo_name=combo_name, is_default=is_default,
+        prediction_dir=prediction_dir
     )
 
     # ---- Stage 6: 回测 ----
@@ -1418,6 +1424,8 @@ def main():
                         help='训练记录文件 (默认 latest_train_records.json)')
     parser.add_argument('--output-dir', type=str, default='output/ensemble',
                         help='输出目录 (默认 output/ensemble)')
+    parser.add_argument('--prediction-dir', type=str, default=None,
+                        help='预测 CSV 输出目录 (默认 output/predictions)')
     parser.add_argument('--no-backtest', action='store_true',
                         help='跳过回测')
     parser.add_argument('--no-charts', action='store_true',
diff --git a/quantpits/scripts/order_gen.py b/quantpits/scripts/order_gen.py
@@ -96,7 +96,8 @@ def get_cashflow_today(cashflow_config, anchor_date):
 # ============================================================================
 # Stage 1: 加载预测数据
 # ============================================================================
-def load_predictions(prediction_file=None, model_name=None, anchor_date=None):
+def load_predictions(prediction_file=None, model_name=None, anchor_date=None,
+                     prediction_dir=None):
     """
     加载预测数据。
 
@@ -120,7 +121,8 @@ def load_predictions(prediction_file=None, model_name=None, anchor_date=None):
 
     if model_name:
         # 按模型名搜索
-        pattern = os.path.join(PREDICTION_DIR, f"{model_name}_*.csv")
+        _pred_dir = prediction_dir or PREDICTION_DIR
+        pattern = os.path.join(_pred_dir, f"{model_name}_*.csv")
         files = sorted(glob.glob(pattern))
         if not files:
             raise FileNotFoundError(
@@ -136,24 +138,25 @@ def load_predictions(prediction_file=None, model_name=None, anchor_date=None):
     # 优先级: ensemble_YYYY-MM-DD.csv (default combo 的向后兼容副本)
     #       > ensemble_default_YYYY-MM-DD.csv (显式 default combo)
     #       > ensemble_*.csv (任意 combo)
+    _pred_dir = prediction_dir or PREDICTION_DIR
     pred_file = None
 
     # 1) 向后兼容格式: ensemble_YYYY-MM-DD.csv (无 combo 名)
-    compat_pattern = os.path.join(PREDICTION_DIR, "ensemble_[0-9]*.csv")
+    compat_pattern = os.path.join(_pred_dir, "ensemble_[0-9]*.csv")
     compat_files = sorted(glob.glob(compat_pattern))
     if compat_files:
         pred_file = compat_files[-1]
 
     # 2) 若无，尝试 ensemble_default_YYYY-MM-DD.csv
     if not pred_file:
-        default_pattern = os.path.join(PREDICTION_DIR, "ensemble_default_*.csv")
+        default_pattern = os.path.join(_pred_dir, "ensemble_default_*.csv")
         default_files = sorted(glob.glob(default_pattern))
         if default_files:
             pred_file = default_files[-1]
 
     # 3) 若仍无，回退到任意 ensemble_*.csv（按日期排序）
     if not pred_file:
-        pattern = os.path.join(PREDICTION_DIR, "ensemble_*.csv")
+        pattern = os.path.join(_pred_dir, "ensemble_*.csv")
         files = sorted(glob.glob(pattern))
         if not files:
             raise FileNotFoundError(
@@ -295,7 +298,8 @@ def _load_pred_latest_day(pred_source, source_type, valid_instruments=None):
 def generate_model_opinions(focus_instruments, current_holding_instruments,
                             top_k, drop_n, buy_suggestion_factor,
                             sorted_df, output_dir, next_trade_date_string,
-                            dry_run=False):
+                            dry_run=False, record_file=None,
+                            prediction_dir=None):
     """
     加载所有 combo 和单一模型的预测，对每个标的生成判断。
 
@@ -343,13 +347,13 @@ def generate_model_opinions(focus_instruments, current_holding_instruments,
     # 1) Combo 预测
     for combo_name, cfg in combos.items():
         combo_info[combo_name] = cfg.get('models', [])
-        pattern = os.path.join(PREDICTION_DIR, f"ensemble_{combo_name}_*.csv")
+        pattern = os.path.join(prediction_dir or PREDICTION_DIR, f"ensemble_{combo_name}_*.csv")
         files = sorted(glob.glob(pattern))
         if files:
             sources.append((f"combo_{combo_name}", files[-1], 'combo', combo_name))
             continue
         if cfg.get('default', False):
-            pattern2 = os.path.join(PREDICTION_DIR, "ensemble_*.csv")
+            pattern2 = os.path.join(prediction_dir or PREDICTION_DIR, "ensemble_*.csv")
             generic_files = []
             for f_path in sorted(glob.glob(pattern2)):
                 basename = os.path.basename(f_path)
@@ -364,13 +368,13 @@ def generate_model_opinions(focus_instruments, current_holding_instruments,
     for cfg in combos.values():
         all_single_models.update(cfg.get('models', []))
     for model_name in sorted(all_single_models):
-        pattern = os.path.join(PREDICTION_DIR, f"{model_name}_*.csv")
+        pattern = os.path.join(prediction_dir or PREDICTION_DIR, f"{model_name}_*.csv")
         files = sorted(glob.glob(pattern))
         if files:
             sources.append((f"model_{model_name}", files[-1], 'model', model_name))
         else:
             try:
-                train_records_file = os.path.join(ROOT_DIR, 'config', 'latest_train_records.json')
+                train_records_file = record_file or os.path.join(ROOT_DIR, 'config', 'latest_train_records.json')
                 if os.path.exists(train_records_file):
                     with open(train_records_file, 'r') as f:
                         train_records = json.load(f)
@@ -602,6 +606,11 @@ def main():
                         help='直接指定预测文件路径')
     parser.add_argument('--output-dir', type=str, default='output',
                         help='输出目录 (默认 output)')
+    parser.add_argument('--prediction-dir', type=str, default=None,
+                        help='预测文件搜索目录 (默认 output/predictions)')
+    parser.add_argument('--record-file', type=str, default=None,
+                        help='训练记录文件，用于加载单模型 PKL 预测 '
+                             '(默认 config/latest_train_records.json)')
     parser.add_argument('--dry-run', action='store_true',
                         help='仅打印订单计划，不写入文件')
     parser.add_argument('--verbose', action='store_true',
@@ -659,7 +668,8 @@ def main():
     pred_df, source_desc = load_predictions(
         prediction_file=args.prediction_file,
         model_name=args.model,
-        anchor_date=anchor_date
+        anchor_date=anchor_date,
+        prediction_dir=args.prediction_dir
     )
 
     print(f"预测来源   : {source_desc}")
@@ -740,7 +750,8 @@ def main():
     opinions_df, combo_info = generate_model_opinions(
         focus_instruments, current_holding_instruments,
         top_k, drop_n, buy_suggestion_factor,
-        sorted_df, args.output_dir, next_trade_date_string, dry_run=args.dry_run
+        sorted_df, args.output_dir, next_trade_date_string, dry_run=args.dry_run,
+        record_file=args.record_file, prediction_dir=args.prediction_dir
     )
 
     if opinions_df is not None and not opinions_df.empty and args.verbose:
diff --git a/quantpits/scripts/signal_ranking.py b/quantpits/scripts/signal_ranking.py
@@ -59,17 +59,21 @@
 # ============================================================================
 # 配置解析 (复用 ensemble_fusion.py 的逻辑)
 # ============================================================================
-def parse_ensemble_config():
+def parse_ensemble_config(config_file=None):
     """
     解析 ensemble_config.json，兼容新旧格式。
 
+    Args:
+        config_file: 配置文件路径 (默认 ENSEMBLE_CONFIG_FILE)
+
     Returns:
         combos: dict, combo_name -> {"models": [], "method": str, "default": bool}
     """
-    if not os.path.exists(ENSEMBLE_CONFIG_FILE):
+    _config_file = config_file or ENSEMBLE_CONFIG_FILE
+    if not os.path.exists(_config_file):
         return {}
 
-    with open(ENSEMBLE_CONFIG_FILE, 'r') as f:
+    with open(_config_file, 'r') as f:
         config = json.load(f)
 
     if 'combos' in config:
@@ -144,22 +148,24 @@ def generate_signal_scores(pred_df, top_n=300):
     return output_df, latest_date
 
 
-def find_prediction_file(combo_name=None, anchor_date=None):
+def find_prediction_file(combo_name=None, anchor_date=None, prediction_dir=None):
     """
     查找预测文件。
 
     Args:
         combo_name: combo 名称，None 表示查找 default ensemble
         anchor_date: 日期限制
+        prediction_dir: 预测文件搜索目录 (默认 PREDICTION_DIR)
 
     Returns:
         pred_file: 文件路径
     """
+    _pred_dir = prediction_dir or PREDICTION_DIR
     if combo_name:
-        pattern = os.path.join(PREDICTION_DIR, f"ensemble_{combo_name}_*.csv")
+        pattern = os.path.join(_pred_dir, f"ensemble_{combo_name}_*.csv")
     else:
         # 查找不带 combo name 的通用 ensemble 文件
-        pattern = os.path.join(PREDICTION_DIR, "ensemble_*.csv")
+        pattern = os.path.join(_pred_dir, "ensemble_*.csv")
 
     files = sorted(glob.glob(pattern))
 
@@ -228,6 +234,8 @@ def main():
                         help='输出 Top N 个标的 (默认 300)')
     parser.add_argument('--output-dir', type=str, default='output/ranking',
                         help='输出目录 (默认 output/ranking)')
+    parser.add_argument('--prediction-dir', type=str, default=None,
+                        help='预测文件搜索目录 (默认 output/predictions)')
     parser.add_argument('--dry-run', action='store_true',
                         help='仅打印，不写入文件')
     args = parser.parse_args()
@@ -259,7 +267,8 @@ def main():
             sys.exit(1)
         for name, cfg in combos.items():
             try:
-                pred_file = find_prediction_file(combo_name=name)
+                pred_file = find_prediction_file(combo_name=name,
+                                                 prediction_dir=args.prediction_dir)
                 tasks.append((name, pred_file))
             except FileNotFoundError as e:
                 print(f"Warning: {e}")
@@ -270,12 +279,13 @@ def main():
         print(f"\n多组合模式: 共 {len(tasks)} 个 combo")
 
     elif args.combo:
-        pred_file = find_prediction_file(combo_name=args.combo)
+        pred_file = find_prediction_file(combo_name=args.combo,
+                                         prediction_dir=args.prediction_dir)
         tasks.append((args.combo, pred_file))
 
     else:
         # Default: 使用最新 ensemble 预测
-        pred_file = find_prediction_file()
+        pred_file = find_prediction_file(prediction_dir=args.prediction_dir)
         tasks.append(('default', pred_file))
 
     # ---- 逐任务处理 ----
diff --git a/tests/quantpits/scripts/test_order_gen.py b/tests/quantpits/scripts/test_order_gen.py
@@ -384,7 +384,8 @@ def test_generate_model_opinions_from_qlib_recorder(mock_R, mock_env, tmp_path):
     with patch('quantpits.scripts.order_gen.ROOT_DIR', str(workspace)):
         opinions_df, combo_info = order_gen.generate_model_opinions(
             ["A"], [], top_k=1, drop_n=0, buy_suggestion_factor=1,
-            sorted_df=sorted_df, output_dir=str(tmp_path), next_trade_date_string="2020-01-01"
+            sorted_df=sorted_df, output_dir=str(tmp_path), next_trade_date_string="2020-01-01",
+            record_file=str(train_records_file)
         )
     
     assert "model_gru" in opinions_df.columns
@@ -432,7 +433,8 @@ def test_main_dry_run_full(mock_D, mock_safeguard, mock_price, mock_pred, mock_c
     mock_D.calendar.return_value = [pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]
     
     import sys
-    with patch.object(sys, 'argv', ['script.py', '--dry-run', '--verbose']):
+    with patch.object(sys, 'argv', ['script.py', '--dry-run', '--verbose',
+                                    '--prediction-dir', str(workspace / "output" / "predictions")]):
         order_gen.main()
     
     # Check if a few key print messages were hit (via capsys if we had it, but mostly we want coverage)
diff --git a/tests/quantpits/scripts/test_signal_ranking.py b/tests/quantpits/scripts/test_signal_ranking.py