From 9f99632032c172697636fdde1ded7744eff67509 Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Mon, 27 Oct 2025 00:03:07 +0530 Subject: [PATCH 01/17] feat: optimize idiosyncratic volatility factor using vectorized covariance operations --- .../examples/benchmark/benchmark_factors.py | 61 ++++++++ .../factors/volatility.py | 134 +++++------------- 2 files changed, 100 insertions(+), 95 deletions(-) create mode 100644 src/quant_research_starter/examples/benchmark/benchmark_factors.py diff --git a/src/quant_research_starter/examples/benchmark/benchmark_factors.py b/src/quant_research_starter/examples/benchmark/benchmark_factors.py new file mode 100644 index 0000000..443ab05 --- /dev/null +++ b/src/quant_research_starter/examples/benchmark/benchmark_factors.py @@ -0,0 +1,61 @@ +""" +Benchmark script to compare performance of factor computations. + +Usage: + python examples/benchmarks/benchmark_factors.py +""" + +import time +import numpy as np +import pandas as pd + +from quant_research_starter.factors import ( + MomentumFactor, + ValueFactor, + SizeFactor, + VolatilityFactor, + IdiosyncraticVolatility, + BollingerBandsFactor, +) + +def generate_synthetic_prices(n_assets: int = 500, n_days: int = 252 * 3) -> pd.DataFrame: + """Generate synthetic random walk price data for testing.""" + np.random.seed(42) + returns = np.random.normal(0, 0.01, size=(n_days, n_assets)) + prices = 100 * np.exp(np.cumsum(returns, axis=0)) + dates = pd.date_range(end=pd.Timestamp.today(), periods=n_days, freq="B") + tickers = [f"Stock_{i:03d}" for i in range(n_assets)] + return pd.DataFrame(prices, index=dates, columns=tickers) + + +def benchmark_factor(factor, prices: pd.DataFrame): + """Benchmark runtime of a given factor.""" + start = time.time() + _ = factor.compute(prices) + end = time.time() + elapsed = end - start + print(f"{factor.name:<25} | Lookback: {factor.lookback:<5} | Time: {elapsed:.3f} sec") + + +def main(): + print("Generating synthetic data...") + prices = generate_synthetic_prices(n_assets=500, n_days=252 * 3) + print(f"Data shape: {prices.shape}") + + print("\nRunning factor benchmarks...\n") + + factors = [ + MomentumFactor(lookback=63), + ValueFactor(), + SizeFactor(), + VolatilityFactor(lookback=21), + IdiosyncraticVolatility(lookback=63), + BollingerBandsFactor(lookback=20), + ] + + for factor in factors: + benchmark_factor(factor, prices) + + +if __name__ == "__main__": + main() diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index b5f914d..74f0d45 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -1,4 +1,4 @@ -"""Volatility factor implementations.""" +"""Volatility factor implementations (vectorized).""" import numpy as np import pandas as pd @@ -7,14 +7,7 @@ class VolatilityFactor(Factor): - """ - Volatility factors measuring different aspects of risk. - - Common volatility measures: - - Historical volatility (realized vol) - - Idiosyncratic volatility - - Volatility of volatility - """ + """Computes historical volatility (annualized).""" def __init__(self, lookback: int = 21, name: str = "volatility"): super().__init__(name=name, lookback=lookback) @@ -23,109 +16,60 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame: """Compute historical volatility over lookback period.""" self._validate_data(prices) - if len(prices) < self.lookback: - raise ValueError(f"Need at least {self.lookback} periods of data") - - # Calculate returns returns = prices.pct_change() - # Compute rolling volatility (annualized); set min_periods to require full window - volatility = returns.rolling( - window=self.lookback, min_periods=self.lookback - ).std() * np.sqrt(252) - - # Remove initial NaN values - volatility = volatility.iloc[self.lookback - 1 :] + # Vectorized rolling std (annualized) + vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) + vol = vol.iloc[self.lookback - 1:] - # Low volatility stocks tend to outperform (volatility anomaly) - # Use scaled negative volatility to ensure clear negative signal in tests - vol_scores = -volatility * 10.0 + # Low-volatility anomaly (invert sign) + scores = -vol * 10.0 - # Cross-sectional z-score when multiple columns; otherwise return scores - if vol_scores.shape[1] > 1: - vol_z = vol_scores.sub(vol_scores.mean(axis=1), axis=0) - denom = vol_scores.std(axis=1).replace(0, np.nan) - vol_z = vol_z.div(denom, axis=0) - result = vol_z + # Cross-sectional z-score + if scores.shape[1] > 1: + z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] + result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - # Single asset: use negative realized vol directly - result = vol_scores + result = scores self._values = result return result class IdiosyncraticVolatility(VolatilityFactor): - """ - Idiosyncratic volatility relative to market model. - - Measures stock-specific risk after accounting for market exposure. - """ + """Vectorized idiosyncratic volatility relative to market model.""" def compute(self, prices: pd.DataFrame) -> pd.DataFrame: - """Compute idiosyncratic volatility from market model residuals.""" + """Compute idiosyncratic volatility using vectorized regression.""" self._validate_data(prices) - if len(prices) < self.lookback: - raise ValueError(f"Need at least {self.lookback} periods of data") - returns = prices.pct_change().dropna() - - # Use equal-weighted portfolio as market proxy - market_returns = returns.mean(axis=1) - - idiosyncratic_vol = pd.DataFrame(index=returns.index, columns=returns.columns) - - # Compute rolling idiosyncratic volatility - for symbol in returns.columns: - stock_returns = returns[symbol] - - def calc_idio_vol(window_returns): - if len(window_returns) < 10: # Minimum observations for regression - return np.nan - - # Simple market model regression - X = market_returns.loc[window_returns.index].values.reshape(-1, 1) - y = window_returns.values - - # Remove NaN values - mask = ~(np.isnan(X) | np.isnan(y)) - X_clean = X[mask[:, 0]] - y_clean = y[mask[:, 0]] - - if len(X_clean) < 10: - return np.nan - - try: - # Calculate residuals via simple OLS beta - x = X_clean.flatten() - x_var = np.var(x) - if x_var == 0: - return np.nan - beta = np.cov(y_clean, x)[0, 1] / x_var - residuals = y_clean - beta * x - return np.std(residuals) * np.sqrt(252) - except Exception: - return np.nan - - idiosyncratic_vol[symbol] = stock_returns.rolling( - window=self.lookback - ).apply(calc_idio_vol, raw=False) - - # Remove initial NaN values - idiosyncratic_vol = idiosyncratic_vol.iloc[self.lookback - 1 :] - - # Negative relationship with returns (idiosyncratic vol anomaly) - idio_scores = -idiosyncratic_vol - - # Z-score normalize when multiple assets; otherwise return scores - if idio_scores.shape[1] > 1: - idio_z = idio_scores.sub(idio_scores.mean(axis=1), axis=0) - denom = idio_scores.std(axis=1).replace(0, np.nan) - idio_z = idio_z.div(denom, axis=0) - result = idio_z + market = returns.mean(axis=1) + + # Compute beta for each asset using vectorized covariance/variance + cov_with_mkt = returns.mul(market, axis=0).rolling(window=self.lookback).mean() - ( + returns.rolling(window=self.lookback).mean().mul(market.rolling(window=self.lookback).mean(), axis=0) + ) + market_var = market.rolling(window=self.lookback).var() + beta = cov_with_mkt.div(market_var, axis=0) + + # Predicted returns via market model + predicted = beta.mul(market, axis=0) + residuals = returns - predicted + + # Rolling residual std (annualized) + idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) + idio_vol = idio_vol.iloc[self.lookback - 1:] + + # Invert sign (low-idio-vol performs better) + scores = -idio_vol + + # Cross-sectional z-score normalization + if scores.shape[1] > 1: + z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] + result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - result = idio_scores + result = scores self._values = result return result From af12d1ac1104bd4cf86a9921f1b3b001b9c7bb7e Mon Sep 17 00:00:00 2001 From: adityacosmos24 <163721133+adityacosmos24@users.noreply.github.com> Date: Mon, 27 Oct 2025 00:39:23 +0530 Subject: [PATCH 02/17] feat: Reorder imports in benchmark_factors.py --- .../examples/benchmark/benchmark_factors.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/quant_research_starter/examples/benchmark/benchmark_factors.py b/src/quant_research_starter/examples/benchmark/benchmark_factors.py index 443ab05..e64b0f3 100644 --- a/src/quant_research_starter/examples/benchmark/benchmark_factors.py +++ b/src/quant_research_starter/examples/benchmark/benchmark_factors.py @@ -6,18 +6,20 @@ """ import time + import numpy as np import pandas as pd from quant_research_starter.factors import ( + BollingerBandsFactor, + IdiosyncraticVolatility, MomentumFactor, - ValueFactor, SizeFactor, + ValueFactor, VolatilityFactor, - IdiosyncraticVolatility, - BollingerBandsFactor, ) + def generate_synthetic_prices(n_assets: int = 500, n_days: int = 252 * 3) -> pd.DataFrame: """Generate synthetic random walk price data for testing.""" np.random.seed(42) From 327c549b8252b694716ce644e216976b89597cbd Mon Sep 17 00:00:00 2001 From: adityacosmos24 <163721133+adityacosmos24@users.noreply.github.com> Date: Mon, 27 Oct 2025 01:26:01 +0530 Subject: [PATCH 03/17] feat: refactor volatility factors for production readiness Enhanced volatility factor implementations for production use, including improved initialization, handling of edge cases, and consistent DataFrame outputs. --- .../factors/volatility.py | 187 +++++++++++++++--- 1 file changed, 160 insertions(+), 27 deletions(-) diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 74f0d45..3ef8a97 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -1,75 +1,208 @@ -"""Volatility factor implementations (vectorized).""" +""" +Volatility factor implementations (vectorized, production-ready). + +This file contains: +- VolatilityFactor: historical (realized) volatility (annualized) with + cross-sectional z-score output. +- IdiosyncraticVolatility: volatility of residuals vs an equal-weighted + market proxy, computed using vectorized operations. + +Key improvements included: +- Proper `__init__` usage. +- Min_periods set on rolling operations; trimming of initial rows to avoid + ambiguous partial-window values. +- Guarding divide-by-zero when computing beta (market variance). +- Consistent handling for single-column (single-asset) DataFrames. +- Preserves DataFrame output shape/columns and sets self._values. +- Uses ddof=0 for rolling std/var to match population estimates (consistent & + fast). +""" + +from __future__ import annotations + +import warnings +from typing import Optional import numpy as np import pandas as pd -from .base import Factor +# Try to import package Factor base; fallback to a minimal stub if unavailable. +try: + # Adjust this import if your project stores Factor in a different module. + from .base import Factor # type: ignore +except Exception: + try: + from quant_research_starter.factors.base import Factor # type: ignore + except Exception: + # Minimal Factor stub so this module can be inspected/tested in isolation. + class Factor: + def __init__(self, name: Optional[str] = None, lookback: Optional[int] = None): + self.name = name or "factor" + self.lookback = lookback or 0 + self._values: Optional[pd.DataFrame] = None + + def _validate_data(self, prices: pd.DataFrame) -> None: + if not isinstance(prices, pd.DataFrame): + raise TypeError("prices must be a pandas DataFrame") + + def __repr__(self) -> str: + return f"" + +# Constants +TRADING_DAYS = 252 class VolatilityFactor(Factor): - """Computes historical volatility (annualized).""" + """Computes historical (realized) volatility (annualized) and returns cross-sectional + z-scores. Low-volatility signals are produced by inverting volatility (i.e. low vol -> high score). + + Parameters + ---------- + lookback : int + Rolling lookback window (in trading days). Default is 21. + name : str + Human-readable name for the factor. + """ def __init__(self, lookback: int = 21, name: str = "volatility"): - super().__init__(name=name, lookback=lookback) + # Call base init if available; also keep explicit attributes for safety. + try: + super().__init__(name=name, lookback=lookback) # type: ignore + except Exception: + # Base class might have a different signature; set manually. + self.name = name + self.lookback = lookback + self._values = None + + # Ensure sensible types/values + if not isinstance(lookback, int) or lookback <= 0: + raise ValueError("lookback must be a positive integer") + self.lookback = lookback + self.name = name def compute(self, prices: pd.DataFrame) -> pd.DataFrame: - """Compute historical volatility over lookback period.""" + """ + Compute annualized historical volatility and return z-scored signals. + + Returns + ------- + pd.DataFrame + DataFrame of the same columns (assets) with index trimmed so that the + first row corresponds to the first full lookback window. + """ self._validate_data(prices) + if prices.shape[0] < self.lookback: + raise ValueError(f"Need at least {self.lookback} rows of data to compute volatility") + + # pct change -> returns returns = prices.pct_change() - # Vectorized rolling std (annualized) - vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) - vol = vol.iloc[self.lookback - 1:] + # rolling std (population, ddof=0) and annualize + vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt( + TRADING_DAYS + ) + + # Trim initial rows that don't correspond to a full window + if self.lookback > 1: + vol = vol.iloc[self.lookback - 1 :] - # Low-volatility anomaly (invert sign) + # Invert sign for low-volatility preference and scale for numeric stability scores = -vol * 10.0 - # Cross-sectional z-score + # Ensure DataFrame (even for single-column) + if isinstance(scores, pd.Series): + scores = scores.to_frame(name=prices.columns[0]) + + # Cross-sectional z-score: (v - mean_row) / std_row if scores.shape[1] > 1: - z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] + row_mean = scores.mean(axis=1) + row_std = scores.std(axis=1).replace(0, np.nan) # avoid divide-by-zero + # subtract mean and divide -- use broadcasting via .values for speed + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - result = scores + # Single asset -> keep the scores DataFrame (no cross-sectional normalization) + result = scores.copy() + # Store and return self._values = result return result class IdiosyncraticVolatility(VolatilityFactor): - """Vectorized idiosyncratic volatility relative to market model.""" + """Compute idiosyncratic volatility relative to an equal-weighted market proxy. + Implements a vectorized market-model approach: + - compute rolling cov(ri, rm) and var(rm) + - beta = cov / var + - residuals = ri - beta * rm + - idio_vol = rolling std(residuals) (annualized) + Returns negative idio_vol (so low idio-vol -> high score) and z-scores cross-sectionally. + """ + + def __init__(self, lookback: int = 63, name: str = "idiosyncratic_volatility"): + super().__init__(lookback=lookback, name=name) def compute(self, prices: pd.DataFrame) -> pd.DataFrame: - """Compute idiosyncratic volatility using vectorized regression.""" self._validate_data(prices) + # require enough rows to compute returns and rolling windows + if prices.shape[0] < self.lookback + 1: + raise ValueError(f"Need at least {self.lookback + 1} rows of data to compute idiosyncratic volatility") + + # daily returns returns = prices.pct_change().dropna() + if returns.shape[0] < self.lookback: + raise ValueError(f"Need at least {self.lookback} non-NA return rows to compute idio-vol") + + # Market proxy: equal-weighted mean across assets market = returns.mean(axis=1) - # Compute beta for each asset using vectorized covariance/variance - cov_with_mkt = returns.mul(market, axis=0).rolling(window=self.lookback).mean() - ( - returns.rolling(window=self.lookback).mean().mul(market.rolling(window=self.lookback).mean(), axis=0) - ) - market_var = market.rolling(window=self.lookback).var() + # Rolling means for covariance decomposition + returns_mean = returns.rolling(window=self.lookback, min_periods=self.lookback).mean() + market_mean = market.rolling(window=self.lookback, min_periods=self.lookback).mean() + + # Compute cov(ri, rm) via E[ri*rm] - E[ri]*E[rm] + e_ri_rm = returns.mul(market, axis=0).rolling(window=self.lookback, min_periods=self.lookback).mean() + cov_with_mkt = e_ri_rm - returns_mean.mul(market_mean, axis=0) + + # market variance (vector) -- guard zeros + market_var = market.rolling(window=self.lookback, min_periods=self.lookback).var(ddof=0).replace(0, np.nan) + + # Beta: cov / var (division broadcasted over columns) beta = cov_with_mkt.div(market_var, axis=0) - # Predicted returns via market model + # Predicted returns: beta * market (broadcasted) predicted = beta.mul(market, axis=0) + + # Residuals (vectorized) residuals = returns - predicted - # Rolling residual std (annualized) - idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) - idio_vol = idio_vol.iloc[self.lookback - 1:] + # Rolling std of residuals (annualized) + idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt( + TRADING_DAYS + ) + + # Trim to first full-window row + if self.lookback > 1: + idio_vol = idio_vol.iloc[self.lookback - 1 :] - # Invert sign (low-idio-vol performs better) + # Negative idiosyncratic vol => prefer low idio-vol scores = -idio_vol - # Cross-sectional z-score normalization + # Ensure DataFrame shape (in case of single-column) + if isinstance(scores, pd.Series): + scores = scores.to_frame(name=prices.columns[0]) + + # Cross-sectional z-score normalization if > 1 asset if scores.shape[1] > 1: - z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] + row_mean = scores.mean(axis=1) + row_std = scores.std(axis=1).replace(0, np.nan) + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - result = scores + result = scores.copy() + # Save and return self._values = result return result From 18fe964c640c4726d85df6923c3a827c4ce46627 Mon Sep 17 00:00:00 2001 From: adityacosmos24 <163721133+adityacosmos24@users.noreply.github.com> Date: Mon, 27 Oct 2025 01:33:31 +0530 Subject: [PATCH 04/17] remove unused import of warnings module Remove unused import statement for warnings. --- src/quant_research_starter/factors/volatility.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 3ef8a97..64c512d 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -20,7 +20,6 @@ from __future__ import annotations -import warnings from typing import Optional import numpy as np From a4fecd252fe231051e3a50b2fba06f15a3da6e73 Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Mon, 27 Oct 2025 01:46:16 +0530 Subject: [PATCH 05/17] feat: fixed errors --- src/quant_research_starter.egg-info/PKG-INFO | 173 +++++++++++++-- .../SOURCES.txt | 3 + .../examples/benchmark/benchmark_factors.py | 8 +- .../factors/volatility.py | 205 +++++++++++++++--- 4 files changed, 346 insertions(+), 43 deletions(-) diff --git a/src/quant_research_starter.egg-info/PKG-INFO b/src/quant_research_starter.egg-info/PKG-INFO index e934ef4..41e22c9 100644 --- a/src/quant_research_starter.egg-info/PKG-INFO +++ b/src/quant_research_starter.egg-info/PKG-INFO @@ -43,24 +43,43 @@ Dynamic: license-file # QuantResearchStarter -A modular, open-source quantitative research and backtesting framework designed for clarity and extensibility. Perfect for researchers, students, and developers interested in quantitative finance. - -![Python Version](https://img.shields.io/badge/python-3.10%2B-blue) -![License](https://img.shields.io/badge/license-MIT-green) +[![Python Version](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/) +[![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE) [![CI](https://github.com/username/QuantResearchStarter/actions/workflows/ci.yml/badge.svg)](https://github.com/username/QuantResearchStarter/actions) -## Features +A modular, open-source quantitative research and backtesting framework built for clarity, reproducibility, and extensibility. Ideal for researchers, students, and engineers building and testing systematic strategies. + +--- + +## Why this project + +QuantResearchStarter aims to provide a clean, well-documented starting point for quantitative research and backtesting. It focuses on: + +* **Readability**: idiomatic Python, type hints, and small modules you can read and change quickly. +* **Testability**: deterministic vectorized backtests with unit tests and CI. +* **Extensibility**: plug-in friendly factor & data adapters so you can try new ideas fast. + +--- + +## Key features -- **Data Management**: Download real data or generate synthetic data for testing -- **Factor Library**: Implement momentum, value, size, and volatility factors -- **Backtesting Engine**: Vectorized backtester with transaction costs and constraints -- **Risk Metrics**: Comprehensive performance and risk analytics -- **Modular Design**: Easy to extend with new factors and strategies -- **Production Ready**: Type hints, tests, CI/CD, and documentation +* **Data management** — download market data or generate synthetic price series for experiments. +* **Factor library** — example implementations of momentum, value, size, and volatility factors. +* **Vectorized backtesting engine** — supports transaction costs, slippage, and portfolio constraints. +* **Risk & performance analytics** — returns, drawdowns, Sharpe, turnover, and other risk metrics. +* **CLI & scripts** — small tools to generate data, compute factors, and run backtests from the terminal. +* **Production-ready utilities** — type hints, tests, continuous integration, and documentation scaffolding. -## Quick Start +--- -### Installation +## Quick start + +### Requirements + +* Python 3.10+ +* pip + +### Install locally ```bash # Clone the repository @@ -70,5 +89,131 @@ cd QuantResearchStarter # Install package in development mode pip install -e . -# Install development dependencies +# Install development dependencies (tests, linters, docs) pip install -e ".[dev]" + +# Optional UI dependencies +pip install streamlit plotly +``` + +### Demo (one-line) + +```bash +make demo +``` + +### Step-by-step demo + +```bash +# generate synthetic sample price series +qrs generate-data -o data_sample/sample_prices.csv -s 5 -d 365 + +# compute example factors +qrs compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv + +# run a backtest +qrs backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json + +# optional: start the Streamlit dashboard +streamlit run src/quant_research_starter/dashboard/streamlit_app.py +``` + +--- + +## Example: small strategy (concept) + +```python +from quant_research_starter.backtest import Backtester +from quant_research_starter.data import load_prices +from quant_research_starter.factors import Momentum + +prices = load_prices("data_sample/sample_prices.csv") +factor = Momentum(window=63) +scores = factor.compute(prices) + +bt = Backtester(prices, signals=scores, capital=1_000_000) +results = bt.run() +print(results.performance.summary()) +``` + +> The code above is illustrative—see `examples/` for fully working notebooks and scripts. + +--- + +## CLI reference + +Run `qrs --help` or `qrs --help` for full usage. Main commands include: + +* `qrs generate-data` — create synthetic price series or download data from adapters +* `qrs compute-factors` — calculate and export factor scores +* `qrs backtest` — run the vectorized backtest and export results + +--- + +## Project structure (overview) + +``` +QuantResearchStarter/ +├─ src/quant_research_starter/ +│ ├─ data/ # data loaders & adapters +│ ├─ factors/ # factor implementations +│ ├─ backtest/ # backtester & portfolio logic +│ ├─ analytics/ # performance and risk metrics +│ ├─ cli/ # command line entry points +│ └─ dashboard/ # optional Streamlit dashboard +├─ examples/ # runnable notebooks & example strategies +├─ tests/ # unit + integration tests +└─ docs/ # documentation source +``` + +--- + +## Tests & CI + +We include unit tests and a CI workflow (GitHub Actions). Run tests locally with: + +```bash +pytest -q +``` + +The CI pipeline runs linting, unit tests, and builds docs on push/PR. + +--- + +## Contributing + +Contributions are very welcome. Please follow these steps: + +1. Fork the repository +2. Create a feature branch +3. Add tests for new behavior +4. Open a pull request with a clear description and rationale + +Please review `CONTRIBUTING.md` and the `CODE_OF_CONDUCT.md` before submitting. + +--- + +## AI policy — short & practical + +**Yes — you are allowed to use AI tools** (ChatGPT, Copilot, Codeium, etc.) to help develop, prototype, or document code in this repository. + +A few friendly guidelines: + +* **Be transparent** when a contribution is substantially generated by an AI assistant — add a short note in the PR or commit message (e.g., "Generated with ChatGPT; reviewed and adapted by "). +* **Review and test** all AI-generated code. Treat it as a helpful draft, not final production-quality code. +* **Follow licensing** and attribution rules for any external snippets the AI suggests. Don’t paste large verbatim copyrighted material. +* **Security & correctness**: double-check numerical logic, data handling, and anything that affects trading decisions. + +This policy is intentionally permissive: we want the community to move fast while keeping quality and safety in mind. + +--- + +## License + +This project is licensed under the MIT License — see the `LICENSE` file for details. + +--- + +## Acknowledgements + +Built with inspiration from open-source quant libraries and the research community. If you use this project in papers or public work, a short citation or mention is appreciated. diff --git a/src/quant_research_starter.egg-info/SOURCES.txt b/src/quant_research_starter.egg-info/SOURCES.txt index 8364ef9..67dbfff 100644 --- a/src/quant_research_starter.egg-info/SOURCES.txt +++ b/src/quant_research_starter.egg-info/SOURCES.txt @@ -12,13 +12,16 @@ src/quant_research_starter.egg-info/requires.txt src/quant_research_starter.egg-info/top_level.txt src/quant_research_starter/backtest/__init__.py src/quant_research_starter/backtest/vectorized.py +src/quant_research_starter/dashboard/streamlit_app.py src/quant_research_starter/data/__init__.py src/quant_research_starter/data/downloaders.py src/quant_research_starter/data/init.py src/quant_research_starter/data/sample_loader.py src/quant_research_starter/data/synthetic.py +src/quant_research_starter/examples/benchmark/benchmark_factors.py src/quant_research_starter/factors/__init__.py src/quant_research_starter/factors/base.py +src/quant_research_starter/factors/bollinger.py src/quant_research_starter/factors/init.py src/quant_research_starter/factors/momentum.py src/quant_research_starter/factors/size.py diff --git a/src/quant_research_starter/examples/benchmark/benchmark_factors.py b/src/quant_research_starter/examples/benchmark/benchmark_factors.py index 443ab05..e64b0f3 100644 --- a/src/quant_research_starter/examples/benchmark/benchmark_factors.py +++ b/src/quant_research_starter/examples/benchmark/benchmark_factors.py @@ -6,18 +6,20 @@ """ import time + import numpy as np import pandas as pd from quant_research_starter.factors import ( + BollingerBandsFactor, + IdiosyncraticVolatility, MomentumFactor, - ValueFactor, SizeFactor, + ValueFactor, VolatilityFactor, - IdiosyncraticVolatility, - BollingerBandsFactor, ) + def generate_synthetic_prices(n_assets: int = 500, n_days: int = 252 * 3) -> pd.DataFrame: """Generate synthetic random walk price data for testing.""" np.random.seed(42) diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 74f0d45..1fef9c1 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -1,75 +1,228 @@ -"""Volatility factor implementations (vectorized).""" +""" +Volatility factor implementations (vectorized, production-ready). + +This file contains: +- VolatilityFactor: historical (realized) volatility (annualized) with + cross-sectional z-score output. +- IdiosyncraticVolatility: volatility of residuals vs an equal-weighted + market proxy, computed using vectorized operations. + +Key improvements included: +- Proper `__init__` usage. +- Min_periods set on rolling operations; trimming of initial rows to avoid + ambiguous partial-window values. +- Guarding divide-by-zero when computing beta (market variance). +- Consistent handling for single-column (single-asset) DataFrames. +- Preserves DataFrame output shape/columns and sets self._values. +- Uses ddof=0 for rolling std/var to match population estimates (consistent & + fast). +""" + +from __future__ import annotations + +from typing import Optional import numpy as np import pandas as pd -from .base import Factor +# Try to import package Factor base; fallback to a minimal stub if unavailable. +try: + # Adjust this import if your project stores Factor in a different module. + from .base import Factor # type: ignore +except Exception: + try: + from quant_research_starter.factors.base import Factor # type: ignore + except Exception: + # Minimal Factor stub so this module can be inspected/tested in isolation. + class Factor: + def __init__( + self, name: Optional[str] = None, lookback: Optional[int] = None + ): + self.name = name or "factor" + self.lookback = lookback or 0 + self._values: Optional[pd.DataFrame] = None + + def _validate_data(self, prices: pd.DataFrame) -> None: + if not isinstance(prices, pd.DataFrame): + raise TypeError("prices must be a pandas DataFrame") + + def __repr__(self) -> str: + return f"" + + +# Constants +TRADING_DAYS = 252 class VolatilityFactor(Factor): - """Computes historical volatility (annualized).""" + """Computes historical (realized) volatility (annualized) and returns cross-sectional + z-scores. Low-volatility signals are produced by inverting volatility (i.e. low vol -> high score). + + Parameters + ---------- + lookback : int + Rolling lookback window (in trading days). Default is 21. + name : str + Human-readable name for the factor. + """ def __init__(self, lookback: int = 21, name: str = "volatility"): - super().__init__(name=name, lookback=lookback) + # Call base init if available; also keep explicit attributes for safety. + try: + super().__init__(name=name, lookback=lookback) # type: ignore + except Exception: + # Base class might have a different signature; set manually. + self.name = name + self.lookback = lookback + self._values = None + + # Ensure sensible types/values + if not isinstance(lookback, int) or lookback <= 0: + raise ValueError("lookback must be a positive integer") + self.lookback = lookback + self.name = name def compute(self, prices: pd.DataFrame) -> pd.DataFrame: - """Compute historical volatility over lookback period.""" + """ + Compute annualized historical volatility and return z-scored signals. + + Returns + ------- + pd.DataFrame + DataFrame of the same columns (assets) with index trimmed so that the + first row corresponds to the first full lookback window. + """ self._validate_data(prices) + if prices.shape[0] < self.lookback: + raise ValueError( + f"Need at least {self.lookback} rows of data to compute volatility" + ) + + # pct change -> returns returns = prices.pct_change() - # Vectorized rolling std (annualized) - vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) - vol = vol.iloc[self.lookback - 1:] + # rolling std (population, ddof=0) and annualize + vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std( + ddof=0 + ) * np.sqrt(TRADING_DAYS) - # Low-volatility anomaly (invert sign) + # Trim initial rows that don't correspond to a full window + if self.lookback > 1: + vol = vol.iloc[self.lookback - 1 :] + + # Invert sign for low-volatility preference and scale for numeric stability scores = -vol * 10.0 - # Cross-sectional z-score + # Ensure DataFrame (even for single-column) + if isinstance(scores, pd.Series): + scores = scores.to_frame(name=prices.columns[0]) + + # Cross-sectional z-score: (v - mean_row) / std_row if scores.shape[1] > 1: - z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] + row_mean = scores.mean(axis=1) + row_std = scores.std(axis=1).replace(0, np.nan) # avoid divide-by-zero + # subtract mean and divide -- use broadcasting via .values for speed + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - result = scores + # Single asset -> keep the scores DataFrame (no cross-sectional normalization) + result = scores.copy() + # Store and return self._values = result return result class IdiosyncraticVolatility(VolatilityFactor): - """Vectorized idiosyncratic volatility relative to market model.""" + """Compute idiosyncratic volatility relative to an equal-weighted market proxy. + Implements a vectorized market-model approach: + - compute rolling cov(ri, rm) and var(rm) + - beta = cov / var + - residuals = ri - beta * rm + - idio_vol = rolling std(residuals) (annualized) + Returns negative idio_vol (so low idio-vol -> high score) and z-scores cross-sectionally. + """ + + def __init__(self, lookback: int = 63, name: str = "idiosyncratic_volatility"): + super().__init__(lookback=lookback, name=name) def compute(self, prices: pd.DataFrame) -> pd.DataFrame: - """Compute idiosyncratic volatility using vectorized regression.""" self._validate_data(prices) + # require enough rows to compute returns and rolling windows + if prices.shape[0] < self.lookback + 1: + raise ValueError( + f"Need at least {self.lookback + 1} rows of data to compute idiosyncratic volatility" + ) + + # daily returns returns = prices.pct_change().dropna() + if returns.shape[0] < self.lookback: + raise ValueError( + f"Need at least {self.lookback} non-NA return rows to compute idio-vol" + ) + + # Market proxy: equal-weighted mean across assets market = returns.mean(axis=1) - # Compute beta for each asset using vectorized covariance/variance - cov_with_mkt = returns.mul(market, axis=0).rolling(window=self.lookback).mean() - ( - returns.rolling(window=self.lookback).mean().mul(market.rolling(window=self.lookback).mean(), axis=0) + # Rolling means for covariance decomposition + returns_mean = returns.rolling( + window=self.lookback, min_periods=self.lookback + ).mean() + market_mean = market.rolling( + window=self.lookback, min_periods=self.lookback + ).mean() + + # Compute cov(ri, rm) via E[ri*rm] - E[ri]*E[rm] + e_ri_rm = ( + returns.mul(market, axis=0) + .rolling(window=self.lookback, min_periods=self.lookback) + .mean() + ) + cov_with_mkt = e_ri_rm - returns_mean.mul(market_mean, axis=0) + + # market variance (vector) -- guard zeros + market_var = ( + market.rolling(window=self.lookback, min_periods=self.lookback) + .var(ddof=0) + .replace(0, np.nan) ) - market_var = market.rolling(window=self.lookback).var() + + # Beta: cov / var (division broadcasted over columns) beta = cov_with_mkt.div(market_var, axis=0) - # Predicted returns via market model + # Predicted returns: beta * market (broadcasted) predicted = beta.mul(market, axis=0) + + # Residuals (vectorized) residuals = returns - predicted - # Rolling residual std (annualized) - idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) - idio_vol = idio_vol.iloc[self.lookback - 1:] + # Rolling std of residuals (annualized) + idio_vol = residuals.rolling( + window=self.lookback, min_periods=self.lookback + ).std(ddof=0) * np.sqrt(TRADING_DAYS) - # Invert sign (low-idio-vol performs better) + # Trim to first full-window row + if self.lookback > 1: + idio_vol = idio_vol.iloc[self.lookback - 1 :] + + # Negative idiosyncratic vol => prefer low idio-vol scores = -idio_vol - # Cross-sectional z-score normalization + # Ensure DataFrame shape (in case of single-column) + if isinstance(scores, pd.Series): + scores = scores.to_frame(name=prices.columns[0]) + + # Cross-sectional z-score normalization if > 1 asset if scores.shape[1] > 1: - z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] + row_mean = scores.mean(axis=1) + row_std = scores.std(axis=1).replace(0, np.nan) + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) result = pd.DataFrame(z, index=scores.index, columns=scores.columns) else: - result = scores + result = scores.copy() + # Save and return self._values = result return result From 33626dc041e1b5af4e274636c5eafe9929b57edc Mon Sep 17 00:00:00 2001 From: adityacosmos24 <163721133+adityacosmos24@users.noreply.github.com> Date: Mon, 27 Oct 2025 01:52:47 +0530 Subject: [PATCH 06/17] remove unused import in volatility.py Remove unused import statement for warnings. --- src/quant_research_starter/factors/volatility.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 3ef8a97..6762482 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -19,8 +19,6 @@ """ from __future__ import annotations - -import warnings from typing import Optional import numpy as np From e0436fbe9ef677cafe8f68d524050e4a0c0bc818 Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Mon, 27 Oct 2025 01:56:51 +0530 Subject: [PATCH 07/17] errors --- src/quant_research_starter/factors/volatility.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 3ef8a97..64c512d 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -20,7 +20,6 @@ from __future__ import annotations -import warnings from typing import Optional import numpy as np From 8c544772311ebebca6cb7d677d318ad9c9adcc32 Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Mon, 27 Oct 2025 01:59:49 +0530 Subject: [PATCH 08/17] errors --- src/quant_research_starter/factors/volatility.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 6762482..5da287b 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -19,11 +19,13 @@ """ from __future__ import annotations -from typing import Optional + import numpy as np import pandas as pd +from typing import Optional + # Try to import package Factor base; fallback to a minimal stub if unavailable. try: # Adjust this import if your project stores Factor in a different module. From 7dbdd606304126030a5bc7b28fd7753137a5befd Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Mon, 27 Oct 2025 02:03:40 +0530 Subject: [PATCH 09/17] fix: sort imports with Ruff --- src/quant_research_starter/factors/volatility.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 5da287b..64c512d 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -20,12 +20,11 @@ from __future__ import annotations +from typing import Optional import numpy as np import pandas as pd -from typing import Optional - # Try to import package Factor base; fallback to a minimal stub if unavailable. try: # Adjust this import if your project stores Factor in a different module. From b70d514785b8d4c5977afd20e48d5752a5f895ee Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Mon, 27 Oct 2025 02:09:49 +0530 Subject: [PATCH 10/17] format code with black --- .../examples/benchmark/benchmark_factors.py | 8 ++- .../factors/volatility.py | 49 +++++++++++++------ 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/quant_research_starter/examples/benchmark/benchmark_factors.py b/src/quant_research_starter/examples/benchmark/benchmark_factors.py index e64b0f3..f1d31b1 100644 --- a/src/quant_research_starter/examples/benchmark/benchmark_factors.py +++ b/src/quant_research_starter/examples/benchmark/benchmark_factors.py @@ -20,7 +20,9 @@ ) -def generate_synthetic_prices(n_assets: int = 500, n_days: int = 252 * 3) -> pd.DataFrame: +def generate_synthetic_prices( + n_assets: int = 500, n_days: int = 252 * 3 +) -> pd.DataFrame: """Generate synthetic random walk price data for testing.""" np.random.seed(42) returns = np.random.normal(0, 0.01, size=(n_days, n_assets)) @@ -36,7 +38,9 @@ def benchmark_factor(factor, prices: pd.DataFrame): _ = factor.compute(prices) end = time.time() elapsed = end - start - print(f"{factor.name:<25} | Lookback: {factor.lookback:<5} | Time: {elapsed:.3f} sec") + print( + f"{factor.name:<25} | Lookback: {factor.lookback:<5} | Time: {elapsed:.3f} sec" + ) def main(): diff --git a/src/quant_research_starter/factors/volatility.py b/src/quant_research_starter/factors/volatility.py index 64c512d..1fef9c1 100644 --- a/src/quant_research_starter/factors/volatility.py +++ b/src/quant_research_starter/factors/volatility.py @@ -35,7 +35,9 @@ except Exception: # Minimal Factor stub so this module can be inspected/tested in isolation. class Factor: - def __init__(self, name: Optional[str] = None, lookback: Optional[int] = None): + def __init__( + self, name: Optional[str] = None, lookback: Optional[int] = None + ): self.name = name or "factor" self.lookback = lookback or 0 self._values: Optional[pd.DataFrame] = None @@ -47,6 +49,7 @@ def _validate_data(self, prices: pd.DataFrame) -> None: def __repr__(self) -> str: return f"" + # Constants TRADING_DAYS = 252 @@ -92,15 +95,17 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame: self._validate_data(prices) if prices.shape[0] < self.lookback: - raise ValueError(f"Need at least {self.lookback} rows of data to compute volatility") + raise ValueError( + f"Need at least {self.lookback} rows of data to compute volatility" + ) # pct change -> returns returns = prices.pct_change() # rolling std (population, ddof=0) and annualize - vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt( - TRADING_DAYS - ) + vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std( + ddof=0 + ) * np.sqrt(TRADING_DAYS) # Trim initial rows that don't correspond to a full window if self.lookback > 1: @@ -147,26 +152,42 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame: # require enough rows to compute returns and rolling windows if prices.shape[0] < self.lookback + 1: - raise ValueError(f"Need at least {self.lookback + 1} rows of data to compute idiosyncratic volatility") + raise ValueError( + f"Need at least {self.lookback + 1} rows of data to compute idiosyncratic volatility" + ) # daily returns returns = prices.pct_change().dropna() if returns.shape[0] < self.lookback: - raise ValueError(f"Need at least {self.lookback} non-NA return rows to compute idio-vol") + raise ValueError( + f"Need at least {self.lookback} non-NA return rows to compute idio-vol" + ) # Market proxy: equal-weighted mean across assets market = returns.mean(axis=1) # Rolling means for covariance decomposition - returns_mean = returns.rolling(window=self.lookback, min_periods=self.lookback).mean() - market_mean = market.rolling(window=self.lookback, min_periods=self.lookback).mean() + returns_mean = returns.rolling( + window=self.lookback, min_periods=self.lookback + ).mean() + market_mean = market.rolling( + window=self.lookback, min_periods=self.lookback + ).mean() # Compute cov(ri, rm) via E[ri*rm] - E[ri]*E[rm] - e_ri_rm = returns.mul(market, axis=0).rolling(window=self.lookback, min_periods=self.lookback).mean() + e_ri_rm = ( + returns.mul(market, axis=0) + .rolling(window=self.lookback, min_periods=self.lookback) + .mean() + ) cov_with_mkt = e_ri_rm - returns_mean.mul(market_mean, axis=0) # market variance (vector) -- guard zeros - market_var = market.rolling(window=self.lookback, min_periods=self.lookback).var(ddof=0).replace(0, np.nan) + market_var = ( + market.rolling(window=self.lookback, min_periods=self.lookback) + .var(ddof=0) + .replace(0, np.nan) + ) # Beta: cov / var (division broadcasted over columns) beta = cov_with_mkt.div(market_var, axis=0) @@ -178,9 +199,9 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame: residuals = returns - predicted # Rolling std of residuals (annualized) - idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt( - TRADING_DAYS - ) + idio_vol = residuals.rolling( + window=self.lookback, min_periods=self.lookback + ).std(ddof=0) * np.sqrt(TRADING_DAYS) # Trim to first full-window row if self.lookback > 1: From 83f8e3178f49882e3f59ad82ca0b915274520b63 Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Tue, 28 Oct 2025 00:35:32 +0530 Subject: [PATCH 11/17] feat: added setuptools poetry entrypoint --- README.md | 36 +++++++++++++---- pyproject.toml | 6 +++ test_cli.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 test_cli.py diff --git a/README.md b/README.md index c6d50e8..25db316 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,26 @@ pip install -e ".[dev]" pip install streamlit plotly ``` +### Quick CLI Usage + +After installation, you can use the CLI in two ways: + +**Option 1: Direct command (if PATH is configured)** +```bash +qrs --help +qrs generate-data -o data/sample.csv -s 5 -d 365 +qrs compute-factors -d data/sample.csv -f momentum -f value +qrs backtest -d data/sample.csv -s output/factors.csv +``` + +**Option 2: Python module (always works)** +```bash +python -m quant_research_starter.cli --help +python -m quant_research_starter.cli generate-data -o data/sample.csv -s 5 -d 365 +python -m quant_research_starter.cli compute-factors -d data/sample.csv -f momentum -f value +python -m quant_research_starter.cli backtest -d data/sample.csv -s output/factors.csv +``` + ### Demo (one-line) ```bash @@ -63,13 +83,13 @@ make demo ```bash # generate synthetic sample price series -qrs generate-data -o data_sample/sample_prices.csv -s 5 -d 365 +python -m quant_research_starter.cli generate-data -o data_sample/sample_prices.csv -s 5 -d 365 # compute example factors -qrs compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv +python -m quant_research_starter.cli compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv # run a backtest -qrs backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json +python -m quant_research_starter.cli backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json # optional: start the Streamlit dashboard streamlit run src/quant_research_starter/dashboard/streamlit_app.py @@ -99,11 +119,13 @@ print(results.performance.summary()) ## CLI reference -Run `qrs --help` or `qrs --help` for full usage. Main commands include: +Run `python -m quant_research_starter.cli --help` or `python -m quant_research_starter.cli --help` for full usage. Main commands include: + +* `python -m quant_research_starter.cli generate-data` — create synthetic price series or download data from adapters +* `python -m quant_research_starter.cli compute-factors` — calculate and export factor scores +* `python -m quant_research_starter.cli backtest` — run the vectorized backtest and export results -* `qrs generate-data` — create synthetic price series or download data from adapters -* `qrs compute-factors` — calculate and export factor scores -* `qrs backtest` — run the vectorized backtest and export results +**Note:** If you have the `qrs` command in your PATH, you can use `qrs` instead of `python -m quant_research_starter.cli`. --- diff --git a/pyproject.toml b/pyproject.toml index d070cdf..4eecd4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,12 @@ qrs = "quant_research_starter.cli:cli" requires = ["setuptools>=45", "wheel"] build-backend = "setuptools.build_meta" +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-dir] +"" = "src" + [tool.black] line-length = 88 target-version = ['py310'] diff --git a/test_cli.py b/test_cli.py new file mode 100644 index 0000000..8606676 --- /dev/null +++ b/test_cli.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +""" +Simple test script for QuantResearch CLI +This demonstrates all the CLI functionality +""" + +import subprocess +import sys +from pathlib import Path + + +def run_command(cmd): + """Run a command and print the result""" + print(f"\n{'='*60}") + print(f"Running: {cmd}") + print('='*60) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + + if result.stdout: + print(result.stdout) + if result.stderr and result.returncode != 0: + print(f"ERROR: {result.stderr}") + return False + + return result.returncode == 0 + + +def main(): + """Run all CLI tests""" + print("\n🧪 Testing QuantResearch CLI") + print("="*60) + + # Define test directories + test_data_dir = Path("test_data") + test_output_dir = Path("test_output") + + # Create test directories + test_data_dir.mkdir(exist_ok=True) + test_output_dir.mkdir(exist_ok=True) + + # Test 1: Show help + success = run_command("python -m quant_research_starter.cli --help") + if not success: + print("\n❌ Test 1 FAILED: Help command") + sys.exit(1) + + # Test 2: Generate data + success = run_command( + "python -m quant_research_starter.cli generate-data " + "-o test_data/data.csv -s 5 -d 100" + ) + if not success: + print("\n❌ Test 2 FAILED: Generate data") + sys.exit(1) + + # Test 3: Compute factors + success = run_command( + "python -m quant_research_starter.cli compute-factors " + "-d test_data/data.csv -f momentum -f value -o test_output/factors.csv" + ) + if not success: + print("\n❌ Test 3 FAILED: Compute factors") + sys.exit(1) + + # Test 4: Run backtest + success = run_command( + "python -m quant_research_starter.cli backtest " + "-d test_data/data.csv -s test_output/factors.csv " + "-o test_output/backtest_results.json" + ) + if not success: + print("\n❌ Test 4 FAILED: Run backtest") + sys.exit(1) + + # Verify output files exist + print("\n📁 Checking output files...") + files_to_check = [ + test_data_dir / "data.csv", + test_output_dir / "factors.csv", + test_output_dir / "backtest_results.json", + test_output_dir / "backtest_plot.png" + ] + + all_exist = True + for file_path in files_to_check: + if file_path.exists(): + print(f"✅ {file_path} exists ({file_path.stat().st_size} bytes)") + else: + print(f"❌ {file_path} missing!") + all_exist = False + + if not all_exist: + print("\n❌ Some output files are missing") + sys.exit(1) + + # Summary + print("\n" + "="*60) + print("✅ ALL TESTS PASSED!") + print("="*60) + print(f"\n📂 Test files created in:") + print(f" - {test_data_dir}/") + print(f" - {test_output_dir}/") + print("\n💡 You can view the results and plots in the test_output directory.") + + +if __name__ == "__main__": + main() From badb9794f28cc8a8693abd2f163630b198e8703c Mon Sep 17 00:00:00 2001 From: adityacosmos24 <163721133+adityacosmos24@users.noreply.github.com> Date: Tue, 28 Oct 2025 01:21:53 +0530 Subject: [PATCH 12/17] chore: update readme with new command syntax and paths --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 25db316..69cb801 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,8 @@ After installation, you can use the CLI in two ways: **Option 1: Direct command (if PATH is configured)** ```bash qrs --help -qrs generate-data -o data/sample.csv -s 5 -d 365 -qrs compute-factors -d data/sample.csv -f momentum -f value +python -m quant_research_starter.cli generate-data -o data_sample/sample_prices.csv -s 5 -d 365 +python -m quant_research_starter.cli compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv qrs backtest -d data/sample.csv -s output/factors.csv ``` @@ -70,7 +70,7 @@ qrs backtest -d data/sample.csv -s output/factors.csv python -m quant_research_starter.cli --help python -m quant_research_starter.cli generate-data -o data/sample.csv -s 5 -d 365 python -m quant_research_starter.cli compute-factors -d data/sample.csv -f momentum -f value -python -m quant_research_starter.cli backtest -d data/sample.csv -s output/factors.csv +python -m quant_research_starter.cli backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json ``` ### Demo (one-line) From 50f717d87975b2ea5aa798670a8f42dfc0acd16d Mon Sep 17 00:00:00 2001 From: Aditya Agrawal Date: Tue, 28 Oct 2025 01:42:52 +0530 Subject: [PATCH 13/17] chore: updated gitignore --- .gitignore | 3 ++- README.md | 32 ++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index ba83c77..f60b36a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ node_modules .coverage __pycache__/ *.py[cod] -*$py.class \ No newline at end of file +*$py.class +src/quant_research_starter.egg-info/PKG-INFO diff --git a/README.md b/README.md index 25db316..83c9bf2 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ QuantResearchStarter aims to provide a clean, well-documented starting point for * **Data management** — download market data or generate synthetic price series for experiments. * **Factor library** — example implementations of momentum, value, size, and volatility factors. -* **Vectorized backtesting engine** — supports transaction costs, slippage, and portfolio constraints. +* **Vectorized backtesting engine** — supports transaction costs, slippage, portfolio constraints, and configurable rebalancing frequencies (daily, weekly, monthly). * **Risk & performance analytics** — returns, drawdowns, Sharpe, turnover, and other risk metrics. * **CLI & scripts** — small tools to generate data, compute factors, and run backtests from the terminal. * **Production-ready utilities** — type hints, tests, continuous integration, and documentation scaffolding. @@ -60,8 +60,8 @@ After installation, you can use the CLI in two ways: **Option 1: Direct command (if PATH is configured)** ```bash qrs --help -qrs generate-data -o data/sample.csv -s 5 -d 365 -qrs compute-factors -d data/sample.csv -f momentum -f value +python -m quant_research_starter.cli generate-data -o data_sample/sample_prices.csv -s 5 -d 365 +python -m quant_research_starter.cli compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv qrs backtest -d data/sample.csv -s output/factors.csv ``` @@ -70,7 +70,7 @@ qrs backtest -d data/sample.csv -s output/factors.csv python -m quant_research_starter.cli --help python -m quant_research_starter.cli generate-data -o data/sample.csv -s 5 -d 365 python -m quant_research_starter.cli compute-factors -d data/sample.csv -f momentum -f value -python -m quant_research_starter.cli backtest -d data/sample.csv -s output/factors.csv +python -m quant_research_starter.cli backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json ``` ### Demo (one-line) @@ -113,6 +113,30 @@ results = bt.run() print(results.performance.summary()) ``` +### Rebalancing Frequency + +The backtester supports different rebalancing frequencies to match your strategy needs: + +```python +from quant_research_starter.backtest import VectorizedBacktest + +# Daily rebalancing (default) +bt_daily = VectorizedBacktest(prices, signals, rebalance_freq="D") + +# Weekly rebalancing (reduces turnover and transaction costs) +bt_weekly = VectorizedBacktest(prices, signals, rebalance_freq="W") + +# Monthly rebalancing (lowest turnover) +bt_monthly = VectorizedBacktest(prices, signals, rebalance_freq="M") + +results = bt_monthly.run() +``` + +Supported frequencies: +- `"D"`: Daily rebalancing (default) +- `"W"`: Weekly rebalancing (rebalances when the week changes) +- `"M"`: Monthly rebalancing (rebalances when the month changes) + > The code above is illustrative—see `examples/` for fully working notebooks and scripts. --- From dd8b845af884619f0083270e43fe48b50f81e442 Mon Sep 17 00:00:00 2001 From: AYUSH KUMAR TIWARI <139953157+ayushkrtiwari@users.noreply.github.com> Date: Tue, 28 Oct 2025 01:50:34 +0530 Subject: [PATCH 14/17] docs: duplicates removed Removed duplicated start section from PKG-INFO --- src/quant_research_starter.egg-info/PKG-INFO | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/quant_research_starter.egg-info/PKG-INFO b/src/quant_research_starter.egg-info/PKG-INFO index 1c9fd45..f6493b4 100644 --- a/src/quant_research_starter.egg-info/PKG-INFO +++ b/src/quant_research_starter.egg-info/PKG-INFO @@ -74,18 +74,6 @@ QuantResearchStarter aims to provide a clean, well-documented starting point for ## Quick start - -* **Data management** — download market data or generate synthetic price series for experiments. -* **Factor library** — example implementations of momentum, value, size, and volatility factors. -* **Vectorized backtesting engine** — supports transaction costs, slippage, portfolio constraints, and configurable rebalancing frequencies (daily, weekly, monthly). -* **Risk & performance analytics** — returns, drawdowns, Sharpe, turnover, and other risk metrics. -* **CLI & scripts** — small tools to generate data, compute factors, and run backtests from the terminal. -* **Production-ready utilities** — type hints, tests, continuous integration, and documentation scaffolding. - ---- - -## Quick start - ### Requirements * Python 3.10+ From 77b56b6779c31fbdb7cee432474ff809fdd5785f Mon Sep 17 00:00:00 2001 From: AYUSH KUMAR TIWARI <139953157+ayushkrtiwari@users.noreply.github.com> Date: Tue, 28 Oct 2025 02:04:47 +0530 Subject: [PATCH 15/17] docs: fix typo error Removed typo errors --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index 466fdd8..f60b36a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,4 @@ node_modules __pycache__/ *.py[cod] *$py.class -<<<<<<< HEAD src/quant_research_starter.egg-info/PKG-INFO -======= ->>>>>>> c8c55966502a7633c73ef7af4d794b072c7f94df From e1409700414732c899127ff4078c47736e9bdba8 Mon Sep 17 00:00:00 2001 From: AYUSH KUMAR TIWARI <139953157+ayushkrtiwari@users.noreply.github.com> Date: Tue, 28 Oct 2025 02:09:58 +0530 Subject: [PATCH 16/17] docs: added description notes added --- src/quant_research_starter.egg-info/PKG-INFO | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quant_research_starter.egg-info/PKG-INFO b/src/quant_research_starter.egg-info/PKG-INFO index f6493b4..189a179 100644 --- a/src/quant_research_starter.egg-info/PKG-INFO +++ b/src/quant_research_starter.egg-info/PKG-INFO @@ -65,7 +65,7 @@ QuantResearchStarter aims to provide a clean, well-documented starting point for * **Data management** — download market data or generate synthetic price series for experiments. * **Factor library** — example implementations of momentum, value, size, and volatility factors. -* **Vectorized backtesting engine** — supports transaction costs, slippage, and portfolio constraints. +* **Vectorized backtesting engine** — supports transaction costs, slippage, portfolio constraints, and configurable rebalancing frequencies (daily, weekly, monthly). * **Risk & performance analytics** — returns, drawdowns, Sharpe, turnover, and other risk metrics. * **CLI & scripts** — small tools to generate data, compute factors, and run backtests from the terminal. * **Production-ready utilities** — type hints, tests, continuous integration, and documentation scaffolding. From 2f2bf84a52a237089de584ea0873c5a3b5000ad4 Mon Sep 17 00:00:00 2001 From: AYUSH KUMAR TIWARI <139953157+ayushkrtiwari@users.noreply.github.com> Date: Tue, 28 Oct 2025 02:24:02 +0530 Subject: [PATCH 17/17] docs: fixed path issue right path updated --- README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 83c9bf2..1d48e1d 100644 --- a/README.md +++ b/README.md @@ -60,16 +60,19 @@ After installation, you can use the CLI in two ways: **Option 1: Direct command (if PATH is configured)** ```bash qrs --help -python -m quant_research_starter.cli generate-data -o data_sample/sample_prices.csv -s 5 -d 365 -python -m quant_research_starter.cli compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv -qrs backtest -d data/sample.csv -s output/factors.csv +# generate synthetic sample price series +qrs generate-data -o data_sample/sample_prices.csv -s 5 -d 365 +# compute example factors +qrs compute-factors -d data_sample/sample_prices.csv -f momentum -f value -o output/factors.csv +# run a backtest +qrs backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json ``` **Option 2: Python module (always works)** ```bash python -m quant_research_starter.cli --help -python -m quant_research_starter.cli generate-data -o data/sample.csv -s 5 -d 365 -python -m quant_research_starter.cli compute-factors -d data/sample.csv -f momentum -f value +python -m quant_research_starter.cli generate-data -o data_sample/sample_prices.csv -s 5 -d 365 +python -m quant_research_starter.cli compute-factors -d data_sample/sample_prices.csv -f momentum -f value python -m quant_research_starter.cli backtest -d data_sample/sample_prices.csv -s output/factors.csv -o output/backtest_results.json ```