Merge pull request #66 from AKKI0511/analyze-quanttradeai-for-end-to-end-feature

AKKI0511 · web-flow · commit d11000074477 · 2025-12-30T20:19:53.000-05:00
feat: add test-window coverage reporting
diff --git a/quanttradeai/cli.py b/quanttradeai/cli.py
@@ -53,8 +53,20 @@ def cmd_train(
     ),
 ):
     """Run full training pipeline."""
+    pipeline_result = run_pipeline(
+        config, skip_validation=skip_validation, include_coverage=True
+    )
+    coverage_info = None
+    if isinstance(pipeline_result, tuple) and len(pipeline_result) == 2:
+        _, coverage_info = pipeline_result
 
-    run_pipeline(config, skip_validation=skip_validation)
+    if coverage_info:
+        fallback = coverage_info.get("fallback_symbols") or []
+        path = coverage_info.get("path")
+        summary = f"Test-window coverage report saved to {path}."
+        if fallback:
+            summary += " Fallback chronological split used for: " + ", ".join(fallback)
+        typer.echo(summary, err=True)
 
 
 @app.command("evaluate")
@@ -164,6 +176,16 @@ def cmd_backtest_model(
         liquidity_max_participation=liquidity_max_participation,
         skip_validation=skip_validation,
     )
+    coverage_info = (
+        summary.get("coverage_report") if isinstance(summary, dict) else None
+    )
+    if coverage_info:
+        fallback = coverage_info.get("fallback_symbols") or []
+        path = coverage_info.get("path")
+        message = f"Test-window coverage report saved to {path}."
+        if fallback:
+            message += " Fallback chronological split used for: " + ", ".join(fallback)
+        typer.echo(message, err=True)
     typer.echo(json.dumps(summary, indent=2))
 
 
diff --git a/quanttradeai/main.py b/quanttradeai/main.py
@@ -75,6 +75,23 @@ def _write_validation_report(report_path: Path, report: dict) -> None:
         logger.warning("Failed to write CSV validation report: %s", exc)
 
 
+def _write_coverage_report(report_path: Path, coverage: dict) -> None:
+    """Persist test-window coverage results to JSON and CSV formats."""
+
+    report_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(report_path, "w") as fh:
+        json.dump(coverage, fh, indent=2)
+
+    try:
+        rows = []
+        for symbol, details in coverage.items():
+            row = {"symbol": symbol, **details}
+            rows.append(row)
+        pd.DataFrame(rows).to_csv(report_path.with_suffix(".csv"), index=False)
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.warning("Failed to write CSV coverage report: %s", exc)
+
+
 def _ensure_datetime_index(df: pd.DataFrame) -> pd.DataFrame:
     """Ensure the DataFrame index is a DatetimeIndex.
 
@@ -103,7 +120,7 @@ def _ensure_datetime_index(df: pd.DataFrame) -> pd.DataFrame:
 def time_aware_split(
     df_labeled: pd.DataFrame,
     cfg: dict,
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
+) -> Tuple[pd.DataFrame, pd.DataFrame, dict]:
     """Return chronological train/test splits using config windows.
 
     Rules
@@ -150,6 +167,10 @@ def _window_has_full_coverage(
         return True
 
     fallback_used = False
+    coverage_ok: bool | None = True if test_start else None
+    data_start = df.index.min()
+    data_end = df.index.max()
+    split_strategy = "window" if test_start else "fraction"
     if test_start:
         start_dt = pd.to_datetime(test_start)
         end_dt = pd.to_datetime(test_end) if test_end else None
@@ -184,7 +205,22 @@ def _window_has_full_coverage(
         raise ValueError(
             f"Invalid train/test window produced empty split when using {window_msg}. Adjust data.test_* or training.test_size."
         )
-    return train_df, test_df
+    coverage: dict = {
+        "data_start": data_start.isoformat(),
+        "data_end": data_end.isoformat(),
+        "test_start": pd.to_datetime(test_start).isoformat() if test_start else None,
+        "test_end": pd.to_datetime(test_end).isoformat() if test_end else None,
+        "train_start": train_df.index.min().isoformat(),
+        "train_end": train_df.index.max().isoformat(),
+        "test_start_actual": test_df.index.min().isoformat(),
+        "test_end_actual": test_df.index.max().isoformat(),
+        "train_size": len(train_df),
+        "test_size": len(test_df),
+        "coverage_ok": coverage_ok,
+        "fallback_used": fallback_used,
+        "split_strategy": split_strategy if not fallback_used else "fraction_fallback",
+    }
+    return train_df, test_df, coverage
 
 
 def _validate_or_raise(
@@ -225,7 +261,10 @@ def _validate_or_raise(
 
 
 def run_pipeline(
-    config_path: str = "config/model_config.yaml", *, skip_validation: bool = False
+    config_path: str = "config/model_config.yaml",
+    *,
+    skip_validation: bool = False,
+    include_coverage: bool = False,
 ):
     """Run the end-to-end training pipeline.
 
@@ -238,6 +277,12 @@ def run_pipeline(
     >>> results = run_pipeline("config/model_config.yaml")
     >>> sorted(results.keys())  # doctest: +ELLIPSIS
     ...
+
+    Set ``include_coverage=True`` to also receive coverage metadata:
+
+    >>> results, coverage = run_pipeline("config/model_config.yaml", include_coverage=True)
+    >>> sorted(coverage.keys())
+    ['fallback_symbols', 'path']
     """
 
     # Load configuration
@@ -274,6 +319,7 @@ def run_pipeline(
 
         # Process each stock
         results = {}
+        coverage_report: dict[str, dict] = {}
         for symbol, df in data_dict.items():
             logger.info(f"\nProcessing {symbol}...")
 
@@ -284,7 +330,8 @@ def run_pipeline(
             df_labeled = data_processor.generate_labels(df_processed)
 
             # 4. Time-aware Split
-            train_df, test_df = time_aware_split(df_labeled, config)
+            train_df, test_df, coverage = time_aware_split(df_labeled, config)
+            coverage_report[symbol] = coverage
             X_train, y_train = model.prepare_data(train_df)
             X_test, y_test = model.prepare_data(test_df)
             # Log split summary
@@ -327,11 +374,35 @@ def run_pipeline(
             logger.info(f"Train Metrics: {train_metrics}")
             logger.info(f"Test Metrics: {test_metrics}")
 
+        coverage_path = Path(experiment_dir) / "test_window_coverage.json"
+        _write_coverage_report(coverage_path, coverage_report)
+        fallback_symbols = [
+            symbol
+            for symbol, details in coverage_report.items()
+            if details.get("fallback_used")
+        ]
+        if fallback_symbols:
+            logger.warning(
+                "Fallback chronological split applied for symbols: %s. Coverage report: %s",
+                ", ".join(fallback_symbols),
+                coverage_path,
+            )
+        logger.info("Coverage report saved to %s", coverage_path)
+
         # Save experiment results
         with open(f"{experiment_dir}/results.json", "w") as f:
             json.dump(results, f, indent=4)
 
         logger.info("\nPipeline completed successfully!")
+        if include_coverage:
+            return (
+                results,
+                {
+                    "path": coverage_path.as_posix(),
+                    "fallback_symbols": fallback_symbols,
+                },
+            )
+
         return results
 
     except Exception as e:
@@ -539,6 +610,7 @@ def run_model_backtest(
     summary: dict = {}
     prepared_data: dict[str, pd.DataFrame] = {}
     artifact_dirs: dict[str, Path] = {}
+    coverage_report: dict[str, dict] = {}
 
     trading_cfg = (cfg or {}).get("trading", {})
     stop_loss = trading_cfg.get("stop_loss")
@@ -595,7 +667,8 @@ def _execution_for(symbol: str) -> dict:
         try:
             df_proc = processor.process_data(df)
             df_lbl = processor.generate_labels(df_proc)
-            train_df, test_df = time_aware_split(df_lbl, cfg)
+            train_df, test_df, coverage = time_aware_split(df_lbl, cfg)
+            coverage_report[symbol] = coverage
             # Build features from saved order
             missing = [
                 c for c in (clf.feature_columns or []) if c not in test_df.columns
@@ -676,6 +749,25 @@ def _execution_for(symbol: str) -> dict:
                 "output_dir": out_dir.as_posix(),
             }
 
+    coverage_path = base_dir / "test_window_coverage.json"
+    _write_coverage_report(coverage_path, coverage_report)
+    fallback_symbols = [
+        symbol
+        for symbol, details in coverage_report.items()
+        if details.get("fallback_used")
+    ]
+    if fallback_symbols:
+        logger.warning(
+            "Fallback chronological split applied for symbols: %s. Coverage report: %s",
+            ", ".join(fallback_symbols),
+            coverage_path,
+        )
+    logger.info("Coverage report saved to %s", coverage_path)
+    summary["coverage_report"] = {
+        "path": coverage_path.as_posix(),
+        "fallback_symbols": fallback_symbols,
+    }
+
     return summary
 
 
diff --git a/tests/data/test_validation_gate.py b/tests/data/test_validation_gate.py
@@ -81,8 +81,11 @@ def test_run_pipeline_skip_validation_allows_progression(
     )
     monkeypatch.setattr(MomentumClassifier, "save_model", lambda self, path: None)
 
-    results = run_pipeline(sample_config_path, skip_validation=True)
+    results, coverage_info = run_pipeline(
+        sample_config_path, skip_validation=True, include_coverage=True
+    )
     assert "AAPL" in results
+    assert coverage_info["path"].endswith("test_window_coverage.json")
 
 
 def test_run_model_backtest_validates_before_execution(
diff --git a/tests/integration/test_pipeline_time_splits.py b/tests/integration/test_pipeline_time_splits.py
@@ -14,30 +14,36 @@ def test_time_aware_split_with_window():
     idx = pd.date_range("2024-01-01", periods=10, freq="D")
     df = pd.DataFrame({"Close": range(10)}, index=idx)
     cfg = {"data": {"test_start": "2024-01-06", "test_end": "2024-01-08"}}
-    train, test = time_aware_split(df, cfg)
+    train, test, coverage = time_aware_split(df, cfg)
     assert train.index.max() < pd.to_datetime("2024-01-06")
     assert test.index.min() == pd.to_datetime("2024-01-06")
     assert test.index.max() == pd.to_datetime("2024-01-08")
     assert len(train) == 5 and len(test) == 3
+    assert coverage["coverage_ok"] is True
+    assert coverage["fallback_used"] is False
 
 
 def test_time_aware_split_with_start_only():
     idx = pd.date_range("2024-01-01", periods=10, freq="D")
     df = pd.DataFrame({"Close": range(10)}, index=idx)
     cfg = {"data": {"test_start": "2024-01-06"}}
-    train, test = time_aware_split(df, cfg)
+    train, test, coverage = time_aware_split(df, cfg)
     assert train.index.max() < pd.to_datetime("2024-01-06")
     assert test.index.min() == pd.to_datetime("2024-01-06")
     assert len(train) == 5 and len(test) == 5
+    assert coverage["coverage_ok"] is True
+    assert coverage["fallback_used"] is False
 
 
 def test_time_aware_split_fallback_fraction():
     idx = pd.date_range("2024-01-01", periods=10, freq="D")
     df = pd.DataFrame({"Close": range(10)}, index=idx)
     cfg = {"training": {"test_size": 0.2}}
-    train, test = time_aware_split(df, cfg)
+    train, test, coverage = time_aware_split(df, cfg)
     assert len(train) == 8 and len(test) == 2
     assert train.index.max() < test.index.min()
+    assert coverage["test_start"] is None
+    assert coverage["coverage_ok"] is None
 
 
 def test_time_aware_split_warns_and_falls_back(caplog):
@@ -49,10 +55,12 @@ def test_time_aware_split_warns_and_falls_back(caplog):
     }
 
     with caplog.at_level(logging.WARNING):
-        train, test = time_aware_split(df, cfg)
+        train, test, coverage = time_aware_split(df, cfg)
 
     assert len(train) == 3 and len(test) == 2
     assert "falling back to chronological split" in caplog.text
+    assert coverage["coverage_ok"] is False
+    assert coverage["fallback_used"] is True
 
 
 def test_time_aware_split_warns_on_partial_window(caplog):
@@ -64,11 +72,32 @@ def test_time_aware_split_warns_on_partial_window(caplog):
     }
 
     with caplog.at_level(logging.WARNING):
-        train, test = time_aware_split(df, cfg)
+        train, test, coverage = time_aware_split(df, cfg)
 
     assert len(train) == 6 and len(test) == 2
     assert train.index.max() < test.index.min()
     assert "not fully present in data; falling back" in caplog.text
+    assert coverage["coverage_ok"] is False
+    assert coverage["fallback_used"] is True
+
+
+def test_time_aware_split_reports_coverage_fields():
+    idx = pd.date_range("2024-01-01", periods=6, freq="D")
+    df = pd.DataFrame({"Close": range(6)}, index=idx)
+    cfg = {
+        "data": {"test_start": "2024-01-05", "test_end": "2024-01-10"},
+        "training": {"test_size": 0.5},
+    }
+
+    train, test, coverage = time_aware_split(df, cfg)
+
+    assert coverage["data_start"].startswith("2024-01-01")
+    assert coverage["data_end"].startswith("2024-01-06")
+    assert coverage["test_start"].startswith("2024-01-05")
+    assert coverage["test_end"].startswith("2024-01-10")
+    assert coverage["train_size"] == len(train)
+    assert coverage["test_size"] == len(test)
+    assert coverage["split_strategy"] == "fraction_fallback"
 
 
 def test_model_config_rejects_out_of_range_test_window():
@@ -159,9 +188,12 @@ def prepare_data(df):
         model_instance.evaluate.return_value = {"accuracy": 1.0}
         model_instance.save_model.return_value = None
 
-        results = run_pipeline(str(config_path))
+        results, coverage_info = run_pipeline(
+            str(config_path), include_coverage=True
+        )
 
     mock_loader.assert_called_once_with(str(config_path))
     assert "AAA" in results
     assert "hyperparameters" in results["AAA"]
+    assert coverage_info["path"].endswith("test_window_coverage.json")
 
diff --git a/tests/streaming/test_gateway.py b/tests/streaming/test_gateway.py
@@ -2,6 +2,7 @@
 import json
 import tempfile
 import time
+import warnings
 from typing import Awaitable, Callable, Dict, List, Optional
 from unittest.mock import AsyncMock, patch
 
@@ -12,6 +13,32 @@
 import pytest
 from quanttradeai.streaming import StreamingGateway
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:.*websockets.*:DeprecationWarning",
+    "ignore:.*WebSocketServerProtocol is deprecated:DeprecationWarning",
+)
+
+warnings.filterwarnings(
+    "ignore",
+    message=r"websockets\.legacy is deprecated",
+    category=DeprecationWarning,
+)
+warnings.filterwarnings(
+    "ignore",
+    message=r"websockets\.server\.WebSocketServerProtocol is deprecated",
+    category=DeprecationWarning,
+)
+warnings.filterwarnings(
+    "ignore",
+    category=DeprecationWarning,
+    module=r"websockets.*",
+)
+warnings.filterwarnings(
+    "ignore",
+    category=DeprecationWarning,
+    module=r"uvicorn\.protocols\.websockets.*",
+)
+
 
 class StubProviderMonitor:
     def __init__(