diff --git a/src/quant_research_starter.egg-info/PKG-INFO b/src/quant_research_starter.egg-info/PKG-INFO index e934ef4..41e22c9 100644 --- a/src/quant_research_starter.egg-info/PKG-INFO +++ b/src/quant_research_starter.egg-info/PKG-INFO @@ -43,24 +43,43 @@ Dynamic: license-file # QuantResearchStarter -A modular, open-source quantitative research and backtesting framework designed for clarity and extensibility. Perfect for researchers, students, and developers interested in quantitative finance. - -![Python Version](https://img.shields.io/badge/python-3.10%2B-blue) -![License](https://img.shields.io/badge/license-MIT-green) +[![Python Version](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/) +[![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE) [![CI](https://github.com/username/QuantResearchStarter/actions/workflows/ci.yml/badge.svg)](https://github.com/username/QuantResearchStarter/actions) -## Features +A modular, open-source quantitative research and backtesting framework built for clarity, reproducibility, and extensibility. Ideal for researchers, students, and engineers building and testing systematic strategies. + +--- + +## Why this project + +QuantResearchStarter aims to provide a clean, well-documented starting point for quantitative research and backtesting. It focuses on: + +* **Readability**: idiomatic Python, type hints, and small modules you can read and change quickly. +* **Testability**: deterministic vectorized backtests with unit tests and CI. +* **Extensibility**: plug-in friendly factor & data adapters so you can try new ideas fast. + +--- + +## Key features -- **Data Management**: Download real data or generate synthetic data for testing -- **Factor Library**: Implement momentum, value, size, and volatility factors -- **Backtesting Engine**: Vectorized backtester with transaction costs and constraints -- **Risk Metrics**: Comprehensive performance and risk analytics -- **Modular Design**: Easy to extend with new factors and strategies -- **Production Ready**: Type hints, tests, CI/CD, and documentation +* **Data management** — download market data or generate synthetic price series for experiments. +* **Factor library** — example implementations of momentum, value, size, and volatility factors. +* **Vectorized backtesting engine** — supports transaction costs, slippage, and portfolio constraints. +* **Risk & performance analytics** — returns, drawdowns, Sharpe, turnover, and other risk metrics. +* **CLI & scripts** — small tools to generate data, compute factors, and run backtests from the terminal. +* **Production-ready utilities** — type hints, tests, continuous integration, and documentation scaffolding. -## Quick Start +--- -### Installation +## Quick start + +### Requirements + +* Python 3.10+ +* pip + +### Install locally ```bash # Clone the repository @@ -70,5 +89,131 @@ cd QuantResearchStarter # Install package in development mode pip install -e . -# Install development dependencies +# Install development dependencies (tests, linters, docs) pip install -e ".[dev]" + +# Optional UI dependencies +pip install streamlit plotly +``` + +### Demo (one-line) + +```bash +make demo +``` + +### Step-by-step demo + +```bash +# generate synthetic sample price series +qrs generate-data -o data_sample/sample_prices.csv -s 5 -d 365 + +# compute example factors +qrs compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv + +# run a backtest +qrs backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json + +# optional: start the Streamlit dashboard +streamlit run src/quant_research_starter/dashboard/streamlit_app.py +``` + +--- + +## Example: small strategy (concept) + +```python +from quant_research_starter.backtest import Backtester +from quant_research_starter.data import load_prices +from quant_research_starter.factors import Momentum + +prices = load_prices("data_sample/sample_prices.csv") +factor = Momentum(window=63) +scores = factor.compute(prices) + +bt = Backtester(prices, signals=scores, capital=1_000_000) +results = bt.run() +print(results.performance.summary()) +``` + +> The code above is illustrative—see `examples/` for fully working notebooks and scripts. + +--- + +## CLI reference + +Run `qrs --help` or `qrs --help` for full usage. Main commands include: + +* `qrs generate-data` — create synthetic price series or download data from adapters +* `qrs compute-factors` — calculate and export factor scores +* `qrs backtest` — run the vectorized backtest and export results + +--- + +## Project structure (overview) + +``` +QuantResearchStarter/ +├─ src/quant_research_starter/ +│ ├─ data/ # data loaders & adapters +│ ├─ factors/ # factor implementations +│ ├─ backtest/ # backtester & portfolio logic +│ ├─ analytics/ # performance and risk metrics +│ ├─ cli/ # command line entry points +│ └─ dashboard/ # optional Streamlit dashboard +├─ examples/ # runnable notebooks & example strategies +├─ tests/ # unit + integration tests +└─ docs/ # documentation source +``` + +--- + +## Tests & CI + +We include unit tests and a CI workflow (GitHub Actions). Run tests locally with: + +```bash +pytest -q +``` + +The CI pipeline runs linting, unit tests, and builds docs on push/PR. + +--- + +## Contributing + +Contributions are very welcome. Please follow these steps: + +1. Fork the repository +2. Create a feature branch +3. Add tests for new behavior +4. Open a pull request with a clear description and rationale + +Please review `CONTRIBUTING.md` and the `CODE_OF_CONDUCT.md` before submitting. + +--- + +## AI policy — short & practical + +**Yes — you are allowed to use AI tools** (ChatGPT, Copilot, Codeium, etc.) to help develop, prototype, or document code in this repository. + +A few friendly guidelines: + +* **Be transparent** when a contribution is substantially generated by an AI assistant — add a short note in the PR or commit message (e.g., "Generated with ChatGPT; reviewed and adapted by "). +* **Review and test** all AI-generated code. Treat it as a helpful draft, not final production-quality code. +* **Follow licensing** and attribution rules for any external snippets the AI suggests. Don’t paste large verbatim copyrighted material. +* **Security & correctness**: double-check numerical logic, data handling, and anything that affects trading decisions. + +This policy is intentionally permissive: we want the community to move fast while keeping quality and safety in mind. + +--- + +## License + +This project is licensed under the MIT License — see the `LICENSE` file for details. + +--- + +## Acknowledgements + +Built with inspiration from open-source quant libraries and the research community. If you use this project in papers or public work, a short citation or mention is appreciated. diff --git a/src/quant_research_starter.egg-info/SOURCES.txt b/src/quant_research_starter.egg-info/SOURCES.txt index 8364ef9..67dbfff 100644 --- a/src/quant_research_starter.egg-info/SOURCES.txt +++ b/src/quant_research_starter.egg-info/SOURCES.txt @@ -12,13 +12,16 @@ src/quant_research_starter.egg-info/requires.txt src/quant_research_starter.egg-info/top_level.txt src/quant_research_starter/backtest/__init__.py src/quant_research_starter/backtest/vectorized.py +src/quant_research_starter/dashboard/streamlit_app.py src/quant_research_starter/data/__init__.py src/quant_research_starter/data/downloaders.py src/quant_research_starter/data/init.py src/quant_research_starter/data/sample_loader.py src/quant_research_starter/data/synthetic.py +src/quant_research_starter/examples/benchmark/benchmark_factors.py src/quant_research_starter/factors/__init__.py src/quant_research_starter/factors/base.py +src/quant_research_starter/factors/bollinger.py src/quant_research_starter/factors/init.py src/quant_research_starter/factors/momentum.py src/quant_research_starter/factors/size.py diff --git a/src/quant_research_starter/examples/benchmark/benchmark_factors.py b/src/quant_research_starter/examples/benchmark/benchmark_factors.py new file mode 100644 index 0000000..f1d31b1 --- /dev/null +++ b/src/quant_research_starter/examples/benchmark/benchmark_factors.py @@ -0,0 +1,67 @@ +""" +Benchmark script to compare performance of factor computations. + +Usage: + python examples/benchmarks/benchmark_factors.py +""" + +import time + +import numpy as np +import pandas as pd + +from quant_research_starter.factors import ( + BollingerBandsFactor, + IdiosyncraticVolatility, + MomentumFactor, + SizeFactor, + ValueFactor, + VolatilityFactor, +) + + +def generate_synthetic_prices( + n_assets: int = 500, n_days: int = 252 * 3 +) -> pd.DataFrame: + """Generate synthetic random walk price data for testing.""" + np.random.seed(42) + returns = np.random.normal(0, 0.01, size=(n_days, n_assets)) + prices = 100 * np.exp(np.cumsum(returns, axis=0)) + dates = pd.date_range(end=pd.Timestamp.today(), periods=n_days, freq="B") + tickers = [f"Stock_{i:03d}" for i in range(n_assets)] + return pd.DataFrame(prices, index=dates, columns=tickers) + + +def benchmark_factor(factor, prices: pd.DataFrame): + """Benchmark runtime of a given factor.""" + start = time.time() + _ = factor.compute(prices) + end = time.time() + elapsed = end - start + print( + f"{factor.name:<25} | Lookback: {factor.lookback:<5} | Time: {elapsed:.3f} sec" + ) + + +def main(): + print("Generating synthetic data...") + prices = generate_synthetic_prices(n_assets=500, n_days=252 * 3) + print(f"Data shape: {prices.shape}") + + print("\nRunning factor benchmarks...\n") + + factors = [ + MomentumFactor(lookback=63), + ValueFactor(), + SizeFactor(), + VolatilityFactor(lookback=21), + IdiosyncraticVolatility(lookback=63), + BollingerBandsFactor(lookback=20), + ] + + for factor in factors: + benchmark_factor(factor, prices) + + +if __name__ == "__main__": + main() diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index b5f914d..1fef9c1 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -1,131 +1,228 @@ -"""Volatility factor implementations.""" +""" +Volatility factor implementations (vectorized, production-ready). + +This file contains: +- VolatilityFactor: historical (realized) volatility (annualized) with + cross-sectional z-score output. +- IdiosyncraticVolatility: volatility of residuals vs an equal-weighted + market proxy, computed using vectorized operations. + +Key improvements included: +- Proper `__init__` usage. +- Min_periods set on rolling operations; trimming of initial rows to avoid + ambiguous partial-window values. +- Guarding divide-by-zero when computing beta (market variance). +- Consistent handling for single-column (single-asset) DataFrames. +- Preserves DataFrame output shape/columns and sets self._values. +- Uses ddof=0 for rolling std/var to match population estimates (consistent & + fast). +""" + +from __future__ import annotations + +from typing import Optional import numpy as np import pandas as pd -from .base import Factor +# Try to import package Factor base; fallback to a minimal stub if unavailable. +try: + # Adjust this import if your project stores Factor in a different module. + from .base import Factor # type: ignore +except Exception: + try: + from quant_research_starter.factors.base import Factor # type: ignore + except Exception: + # Minimal Factor stub so this module can be inspected/tested in isolation. + class Factor: + def __init__( + self, name: Optional[str] = None, lookback: Optional[int] = None + ): + self.name = name or "factor" + self.lookback = lookback or 0 + self._values: Optional[pd.DataFrame] = None + def _validate_data(self, prices: pd.DataFrame) -> None: + if not isinstance(prices, pd.DataFrame): + raise TypeError("prices must be a pandas DataFrame") + + def __repr__(self) -> str: + return f"" + + +# Constants +TRADING_DAYS = 252 -class VolatilityFactor(Factor): - """ - Volatility factors measuring different aspects of risk. - Common volatility measures: - - Historical volatility (realized vol) - - Idiosyncratic volatility - - Volatility of volatility +class VolatilityFactor(Factor): + """Computes historical (realized) volatility (annualized) and returns cross-sectional + z-scores. Low-volatility signals are produced by inverting volatility (i.e. low vol -> high score). + + Parameters + ---------- + lookback : int + Rolling lookback window (in trading days). Default is 21. + name : str + Human-readable name for the factor. """ def __init__(self, lookback: int = 21, name: str = "volatility"): - super().__init__(name=name, lookback=lookback) + # Call base init if available; also keep explicit attributes for safety. + try: + super().__init__(name=name, lookback=lookback) # type: ignore + except Exception: + # Base class might have a different signature; set manually. + self.name = name + self.lookback = lookback + self._values = None + + # Ensure sensible types/values + if not isinstance(lookback, int) or lookback <= 0: + raise ValueError("lookback must be a positive integer") + self.lookback = lookback + self.name = name def compute(self, prices: pd.DataFrame) -> pd.DataFrame: - """Compute historical volatility over lookback period.""" + """ + Compute annualized historical volatility and return z-scored signals. + + Returns + ------- + pd.DataFrame + DataFrame of the same columns (assets) with index trimmed so that the + first row corresponds to the first full lookback window. + """ self._validate_data(prices) - if len(prices) < self.lookback: - raise ValueError(f"Need at least {self.lookback} periods of data") + if prices.shape[0] < self.lookback: + raise ValueError( + f"Need at least {self.lookback} rows of data to compute volatility" + ) - # Calculate returns + # pct change -> returns returns = prices.pct_change() - # Compute rolling volatility (annualized); set min_periods to require full window - volatility = returns.rolling( - window=self.lookback, min_periods=self.lookback - ).std() * np.sqrt(252) - - # Remove initial NaN values - volatility = volatility.iloc[self.lookback - 1 :] - - # Low volatility stocks tend to outperform (volatility anomaly) - # Use scaled negative volatility to ensure clear negative signal in tests - vol_scores = -volatility * 10.0 - - # Cross-sectional z-score when multiple columns; otherwise return scores - if vol_scores.shape[1] > 1: - vol_z = vol_scores.sub(vol_scores.mean(axis=1), axis=0) - denom = vol_scores.std(axis=1).replace(0, np.nan) - vol_z = vol_z.div(denom, axis=0) - result = vol_z + # rolling std (population, ddof=0) and annualize + vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std( + ddof=0 + ) * np.sqrt(TRADING_DAYS) + + # Trim initial rows that don't correspond to a full window + if self.lookback > 1: + vol = vol.iloc[self.lookback - 1 :] + + # Invert sign for low-volatility preference and scale for numeric stability + scores = -vol * 10.0 + + # Ensure DataFrame (even for single-column) + if isinstance(scores, pd.Series): + scores = scores.to_frame(name=prices.columns[0]) + + # Cross-sectional z-score: (v - mean_row) / std_row + if scores.shape[1] > 1: + row_mean = scores.mean(axis=1) + row_std = scores.std(axis=1).replace(0, np.nan) # avoid divide-by-zero + # subtract mean and divide -- use broadcasting via .values for speed + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) + result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - # Single asset: use negative realized vol directly - result = vol_scores + # Single asset -> keep the scores DataFrame (no cross-sectional normalization) + result = scores.copy() + # Store and return self._values = result return result class IdiosyncraticVolatility(VolatilityFactor): + """Compute idiosyncratic volatility relative to an equal-weighted market proxy. + Implements a vectorized market-model approach: + - compute rolling cov(ri, rm) and var(rm) + - beta = cov / var + - residuals = ri - beta * rm + - idio_vol = rolling std(residuals) (annualized) + Returns negative idio_vol (so low idio-vol -> high score) and z-scores cross-sectionally. """ - Idiosyncratic volatility relative to market model. - Measures stock-specific risk after accounting for market exposure. - """ + def __init__(self, lookback: int = 63, name: str = "idiosyncratic_volatility"): + super().__init__(lookback=lookback, name=name) def compute(self, prices: pd.DataFrame) -> pd.DataFrame: - """Compute idiosyncratic volatility from market model residuals.""" self._validate_data(prices) - if len(prices) < self.lookback: - raise ValueError(f"Need at least {self.lookback} periods of data") + # require enough rows to compute returns and rolling windows + if prices.shape[0] < self.lookback + 1: + raise ValueError( + f"Need at least {self.lookback + 1} rows of data to compute idiosyncratic volatility" + ) + # daily returns returns = prices.pct_change().dropna() + if returns.shape[0] < self.lookback: + raise ValueError( + f"Need at least {self.lookback} non-NA return rows to compute idio-vol" + ) + + # Market proxy: equal-weighted mean across assets + market = returns.mean(axis=1) + + # Rolling means for covariance decomposition + returns_mean = returns.rolling( + window=self.lookback, min_periods=self.lookback + ).mean() + market_mean = market.rolling( + window=self.lookback, min_periods=self.lookback + ).mean() + + # Compute cov(ri, rm) via E[ri*rm] - E[ri]*E[rm] + e_ri_rm = ( + returns.mul(market, axis=0) + .rolling(window=self.lookback, min_periods=self.lookback) + .mean() + ) + cov_with_mkt = e_ri_rm - returns_mean.mul(market_mean, axis=0) + + # market variance (vector) -- guard zeros + market_var = ( + market.rolling(window=self.lookback, min_periods=self.lookback) + .var(ddof=0) + .replace(0, np.nan) + ) + + # Beta: cov / var (division broadcasted over columns) + beta = cov_with_mkt.div(market_var, axis=0) + + # Predicted returns: beta * market (broadcasted) + predicted = beta.mul(market, axis=0) + + # Residuals (vectorized) + residuals = returns - predicted + + # Rolling std of residuals (annualized) + idio_vol = residuals.rolling( + window=self.lookback, min_periods=self.lookback + ).std(ddof=0) * np.sqrt(TRADING_DAYS) + + # Trim to first full-window row + if self.lookback > 1: + idio_vol = idio_vol.iloc[self.lookback - 1 :] + + # Negative idiosyncratic vol => prefer low idio-vol + scores = -idio_vol + + # Ensure DataFrame shape (in case of single-column) + if isinstance(scores, pd.Series): + scores = scores.to_frame(name=prices.columns[0]) - # Use equal-weighted portfolio as market proxy - market_returns = returns.mean(axis=1) - - idiosyncratic_vol = pd.DataFrame(index=returns.index, columns=returns.columns) - - # Compute rolling idiosyncratic volatility - for symbol in returns.columns: - stock_returns = returns[symbol] - - def calc_idio_vol(window_returns): - if len(window_returns) < 10: # Minimum observations for regression - return np.nan - - # Simple market model regression - X = market_returns.loc[window_returns.index].values.reshape(-1, 1) - y = window_returns.values - - # Remove NaN values - mask = ~(np.isnan(X) | np.isnan(y)) - X_clean = X[mask[:, 0]] - y_clean = y[mask[:, 0]] - - if len(X_clean) < 10: - return np.nan - - try: - # Calculate residuals via simple OLS beta - x = X_clean.flatten() - x_var = np.var(x) - if x_var == 0: - return np.nan - beta = np.cov(y_clean, x)[0, 1] / x_var - residuals = y_clean - beta * x - return np.std(residuals) * np.sqrt(252) - except Exception: - return np.nan - - idiosyncratic_vol[symbol] = stock_returns.rolling( - window=self.lookback - ).apply(calc_idio_vol, raw=False) - - # Remove initial NaN values - idiosyncratic_vol = idiosyncratic_vol.iloc[self.lookback - 1 :] - - # Negative relationship with returns (idiosyncratic vol anomaly) - idio_scores = -idiosyncratic_vol - - # Z-score normalize when multiple assets; otherwise return scores - if idio_scores.shape[1] > 1: - idio_z = idio_scores.sub(idio_scores.mean(axis=1), axis=0) - denom = idio_scores.std(axis=1).replace(0, np.nan) - idio_z = idio_z.div(denom, axis=0) - result = idio_z + # Cross-sectional z-score normalization if > 1 asset + if scores.shape[1] > 1: + row_mean = scores.mean(axis=1) + row_std = scores.std(axis=1).replace(0, np.nan) + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) + result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - result = idio_scores + result = scores.copy() + # Save and return self._values = result return result