|
1 | | -"""Volatility factor implementations (vectorized).""" |
| 1 | +""" |
| 2 | +Volatility factor implementations (vectorized, production-ready). |
| 3 | +
|
| 4 | +This file contains: |
| 5 | +- VolatilityFactor: historical (realized) volatility (annualized) with |
| 6 | + cross-sectional z-score output. |
| 7 | +- IdiosyncraticVolatility: volatility of residuals vs an equal-weighted |
| 8 | + market proxy, computed using vectorized operations. |
| 9 | +
|
| 10 | +Key improvements included: |
| 11 | +- Proper `__init__` usage. |
| 12 | +- Min_periods set on rolling operations; trimming of initial rows to avoid |
| 13 | + ambiguous partial-window values. |
| 14 | +- Guarding divide-by-zero when computing beta (market variance). |
| 15 | +- Consistent handling for single-column (single-asset) DataFrames. |
| 16 | +- Preserves DataFrame output shape/columns and sets self._values. |
| 17 | +- Uses ddof=0 for rolling std/var to match population estimates (consistent & |
| 18 | + fast). |
| 19 | +""" |
| 20 | + |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import warnings |
| 24 | +from typing import Optional |
2 | 25 |
|
3 | 26 | import numpy as np |
4 | 27 | import pandas as pd |
5 | 28 |
|
6 | | -from .base import Factor |
| 29 | +# Try to import package Factor base; fallback to a minimal stub if unavailable. |
| 30 | +try: |
| 31 | + # Adjust this import if your project stores Factor in a different module. |
| 32 | + from .base import Factor # type: ignore |
| 33 | +except Exception: |
| 34 | + try: |
| 35 | + from quant_research_starter.factors.base import Factor # type: ignore |
| 36 | + except Exception: |
| 37 | + # Minimal Factor stub so this module can be inspected/tested in isolation. |
| 38 | + class Factor: |
| 39 | + def __init__(self, name: Optional[str] = None, lookback: Optional[int] = None): |
| 40 | + self.name = name or "factor" |
| 41 | + self.lookback = lookback or 0 |
| 42 | + self._values: Optional[pd.DataFrame] = None |
| 43 | + |
| 44 | + def _validate_data(self, prices: pd.DataFrame) -> None: |
| 45 | + if not isinstance(prices, pd.DataFrame): |
| 46 | + raise TypeError("prices must be a pandas DataFrame") |
| 47 | + |
| 48 | + def __repr__(self) -> str: |
| 49 | + return f"<Factor name={self.name} lookback={self.lookback}>" |
| 50 | + |
| 51 | +# Constants |
| 52 | +TRADING_DAYS = 252 |
7 | 53 |
|
8 | 54 |
|
9 | 55 | class VolatilityFactor(Factor): |
10 | | - """Computes historical volatility (annualized).""" |
| 56 | + """Computes historical (realized) volatility (annualized) and returns cross-sectional |
| 57 | + z-scores. Low-volatility signals are produced by inverting volatility (i.e. low vol -> high score). |
| 58 | +
|
| 59 | + Parameters |
| 60 | + ---------- |
| 61 | + lookback : int |
| 62 | + Rolling lookback window (in trading days). Default is 21. |
| 63 | + name : str |
| 64 | + Human-readable name for the factor. |
| 65 | + """ |
11 | 66 |
|
12 | 67 | def __init__(self, lookback: int = 21, name: str = "volatility"): |
13 | | - super().__init__(name=name, lookback=lookback) |
| 68 | + # Call base init if available; also keep explicit attributes for safety. |
| 69 | + try: |
| 70 | + super().__init__(name=name, lookback=lookback) # type: ignore |
| 71 | + except Exception: |
| 72 | + # Base class might have a different signature; set manually. |
| 73 | + self.name = name |
| 74 | + self.lookback = lookback |
| 75 | + self._values = None |
| 76 | + |
| 77 | + # Ensure sensible types/values |
| 78 | + if not isinstance(lookback, int) or lookback <= 0: |
| 79 | + raise ValueError("lookback must be a positive integer") |
| 80 | + self.lookback = lookback |
| 81 | + self.name = name |
14 | 82 |
|
15 | 83 | def compute(self, prices: pd.DataFrame) -> pd.DataFrame: |
16 | | - """Compute historical volatility over lookback period.""" |
| 84 | + """ |
| 85 | + Compute annualized historical volatility and return z-scored signals. |
| 86 | +
|
| 87 | + Returns |
| 88 | + ------- |
| 89 | + pd.DataFrame |
| 90 | + DataFrame of the same columns (assets) with index trimmed so that the |
| 91 | + first row corresponds to the first full lookback window. |
| 92 | + """ |
17 | 93 | self._validate_data(prices) |
18 | 94 |
|
| 95 | + if prices.shape[0] < self.lookback: |
| 96 | + raise ValueError(f"Need at least {self.lookback} rows of data to compute volatility") |
| 97 | + |
| 98 | + # pct change -> returns |
19 | 99 | returns = prices.pct_change() |
20 | 100 |
|
21 | | - # Vectorized rolling std (annualized) |
22 | | - vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) |
23 | | - vol = vol.iloc[self.lookback - 1:] |
| 101 | + # rolling std (population, ddof=0) and annualize |
| 102 | + vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt( |
| 103 | + TRADING_DAYS |
| 104 | + ) |
| 105 | + |
| 106 | + # Trim initial rows that don't correspond to a full window |
| 107 | + if self.lookback > 1: |
| 108 | + vol = vol.iloc[self.lookback - 1 :] |
24 | 109 |
|
25 | | - # Low-volatility anomaly (invert sign) |
| 110 | + # Invert sign for low-volatility preference and scale for numeric stability |
26 | 111 | scores = -vol * 10.0 |
27 | 112 |
|
28 | | - # Cross-sectional z-score |
| 113 | + # Ensure DataFrame (even for single-column) |
| 114 | + if isinstance(scores, pd.Series): |
| 115 | + scores = scores.to_frame(name=prices.columns[0]) |
| 116 | + |
| 117 | + # Cross-sectional z-score: (v - mean_row) / std_row |
29 | 118 | if scores.shape[1] > 1: |
30 | | - z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] |
| 119 | + row_mean = scores.mean(axis=1) |
| 120 | + row_std = scores.std(axis=1).replace(0, np.nan) # avoid divide-by-zero |
| 121 | + # subtract mean and divide -- use broadcasting via .values for speed |
| 122 | + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) |
31 | 123 | result = pd.DataFrame(z, index=scores.index, columns=scores.columns) |
32 | 124 | else: |
33 | | - result = scores |
| 125 | + # Single asset -> keep the scores DataFrame (no cross-sectional normalization) |
| 126 | + result = scores.copy() |
34 | 127 |
|
| 128 | + # Store and return |
35 | 129 | self._values = result |
36 | 130 | return result |
37 | 131 |
|
38 | 132 |
|
39 | 133 | class IdiosyncraticVolatility(VolatilityFactor): |
40 | | - """Vectorized idiosyncratic volatility relative to market model.""" |
| 134 | + """Compute idiosyncratic volatility relative to an equal-weighted market proxy. |
| 135 | + Implements a vectorized market-model approach: |
| 136 | + - compute rolling cov(ri, rm) and var(rm) |
| 137 | + - beta = cov / var |
| 138 | + - residuals = ri - beta * rm |
| 139 | + - idio_vol = rolling std(residuals) (annualized) |
| 140 | + Returns negative idio_vol (so low idio-vol -> high score) and z-scores cross-sectionally. |
| 141 | + """ |
| 142 | + |
| 143 | + def __init__(self, lookback: int = 63, name: str = "idiosyncratic_volatility"): |
| 144 | + super().__init__(lookback=lookback, name=name) |
41 | 145 |
|
42 | 146 | def compute(self, prices: pd.DataFrame) -> pd.DataFrame: |
43 | | - """Compute idiosyncratic volatility using vectorized regression.""" |
44 | 147 | self._validate_data(prices) |
45 | 148 |
|
| 149 | + # require enough rows to compute returns and rolling windows |
| 150 | + if prices.shape[0] < self.lookback + 1: |
| 151 | + raise ValueError(f"Need at least {self.lookback + 1} rows of data to compute idiosyncratic volatility") |
| 152 | + |
| 153 | + # daily returns |
46 | 154 | returns = prices.pct_change().dropna() |
| 155 | + if returns.shape[0] < self.lookback: |
| 156 | + raise ValueError(f"Need at least {self.lookback} non-NA return rows to compute idio-vol") |
| 157 | + |
| 158 | + # Market proxy: equal-weighted mean across assets |
47 | 159 | market = returns.mean(axis=1) |
48 | 160 |
|
49 | | - # Compute beta for each asset using vectorized covariance/variance |
50 | | - cov_with_mkt = returns.mul(market, axis=0).rolling(window=self.lookback).mean() - ( |
51 | | - returns.rolling(window=self.lookback).mean().mul(market.rolling(window=self.lookback).mean(), axis=0) |
52 | | - ) |
53 | | - market_var = market.rolling(window=self.lookback).var() |
| 161 | + # Rolling means for covariance decomposition |
| 162 | + returns_mean = returns.rolling(window=self.lookback, min_periods=self.lookback).mean() |
| 163 | + market_mean = market.rolling(window=self.lookback, min_periods=self.lookback).mean() |
| 164 | + |
| 165 | + # Compute cov(ri, rm) via E[ri*rm] - E[ri]*E[rm] |
| 166 | + e_ri_rm = returns.mul(market, axis=0).rolling(window=self.lookback, min_periods=self.lookback).mean() |
| 167 | + cov_with_mkt = e_ri_rm - returns_mean.mul(market_mean, axis=0) |
| 168 | + |
| 169 | + # market variance (vector) -- guard zeros |
| 170 | + market_var = market.rolling(window=self.lookback, min_periods=self.lookback).var(ddof=0).replace(0, np.nan) |
| 171 | + |
| 172 | + # Beta: cov / var (division broadcasted over columns) |
54 | 173 | beta = cov_with_mkt.div(market_var, axis=0) |
55 | 174 |
|
56 | | - # Predicted returns via market model |
| 175 | + # Predicted returns: beta * market (broadcasted) |
57 | 176 | predicted = beta.mul(market, axis=0) |
| 177 | + |
| 178 | + # Residuals (vectorized) |
58 | 179 | residuals = returns - predicted |
59 | 180 |
|
60 | | - # Rolling residual std (annualized) |
61 | | - idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252) |
62 | | - idio_vol = idio_vol.iloc[self.lookback - 1:] |
| 181 | + # Rolling std of residuals (annualized) |
| 182 | + idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt( |
| 183 | + TRADING_DAYS |
| 184 | + ) |
| 185 | + |
| 186 | + # Trim to first full-window row |
| 187 | + if self.lookback > 1: |
| 188 | + idio_vol = idio_vol.iloc[self.lookback - 1 :] |
63 | 189 |
|
64 | | - # Invert sign (low-idio-vol performs better) |
| 190 | + # Negative idiosyncratic vol => prefer low idio-vol |
65 | 191 | scores = -idio_vol |
66 | 192 |
|
67 | | - # Cross-sectional z-score normalization |
| 193 | + # Ensure DataFrame shape (in case of single-column) |
| 194 | + if isinstance(scores, pd.Series): |
| 195 | + scores = scores.to_frame(name=prices.columns[0]) |
| 196 | + |
| 197 | + # Cross-sectional z-score normalization if > 1 asset |
68 | 198 | if scores.shape[1] > 1: |
69 | | - z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None] |
| 199 | + row_mean = scores.mean(axis=1) |
| 200 | + row_std = scores.std(axis=1).replace(0, np.nan) |
| 201 | + z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0) |
70 | 202 | result = pd.DataFrame(z, index=scores.index, columns=scores.columns) |
71 | 203 | else: |
72 | | - result = scores |
| 204 | + result = scores.copy() |
73 | 205 |
|
| 206 | + # Save and return |
74 | 207 | self._values = result |
75 | 208 | return result |
0 commit comments