Skip to content

Commit 327c549

Browse files
feat: refactor volatility factors for production readiness
Enhanced volatility factor implementations for production use, including improved initialization, handling of edge cases, and consistent DataFrame outputs.
1 parent af12d1a commit 327c549

File tree

1 file changed

+160
-27
lines changed

1 file changed

+160
-27
lines changed
Lines changed: 160 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,208 @@
1-
"""Volatility factor implementations (vectorized)."""
1+
"""
2+
Volatility factor implementations (vectorized, production-ready).
3+
4+
This file contains:
5+
- VolatilityFactor: historical (realized) volatility (annualized) with
6+
cross-sectional z-score output.
7+
- IdiosyncraticVolatility: volatility of residuals vs an equal-weighted
8+
market proxy, computed using vectorized operations.
9+
10+
Key improvements included:
11+
- Proper `__init__` usage.
12+
- Min_periods set on rolling operations; trimming of initial rows to avoid
13+
ambiguous partial-window values.
14+
- Guarding divide-by-zero when computing beta (market variance).
15+
- Consistent handling for single-column (single-asset) DataFrames.
16+
- Preserves DataFrame output shape/columns and sets self._values.
17+
- Uses ddof=0 for rolling std/var to match population estimates (consistent &
18+
fast).
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import warnings
24+
from typing import Optional
225

326
import numpy as np
427
import pandas as pd
528

6-
from .base import Factor
29+
# Try to import package Factor base; fallback to a minimal stub if unavailable.
30+
try:
31+
# Adjust this import if your project stores Factor in a different module.
32+
from .base import Factor # type: ignore
33+
except Exception:
34+
try:
35+
from quant_research_starter.factors.base import Factor # type: ignore
36+
except Exception:
37+
# Minimal Factor stub so this module can be inspected/tested in isolation.
38+
class Factor:
39+
def __init__(self, name: Optional[str] = None, lookback: Optional[int] = None):
40+
self.name = name or "factor"
41+
self.lookback = lookback or 0
42+
self._values: Optional[pd.DataFrame] = None
43+
44+
def _validate_data(self, prices: pd.DataFrame) -> None:
45+
if not isinstance(prices, pd.DataFrame):
46+
raise TypeError("prices must be a pandas DataFrame")
47+
48+
def __repr__(self) -> str:
49+
return f"<Factor name={self.name} lookback={self.lookback}>"
50+
51+
# Constants
52+
TRADING_DAYS = 252
753

854

955
class VolatilityFactor(Factor):
10-
"""Computes historical volatility (annualized)."""
56+
"""Computes historical (realized) volatility (annualized) and returns cross-sectional
57+
z-scores. Low-volatility signals are produced by inverting volatility (i.e. low vol -> high score).
58+
59+
Parameters
60+
----------
61+
lookback : int
62+
Rolling lookback window (in trading days). Default is 21.
63+
name : str
64+
Human-readable name for the factor.
65+
"""
1166

1267
def __init__(self, lookback: int = 21, name: str = "volatility"):
13-
super().__init__(name=name, lookback=lookback)
68+
# Call base init if available; also keep explicit attributes for safety.
69+
try:
70+
super().__init__(name=name, lookback=lookback) # type: ignore
71+
except Exception:
72+
# Base class might have a different signature; set manually.
73+
self.name = name
74+
self.lookback = lookback
75+
self._values = None
76+
77+
# Ensure sensible types/values
78+
if not isinstance(lookback, int) or lookback <= 0:
79+
raise ValueError("lookback must be a positive integer")
80+
self.lookback = lookback
81+
self.name = name
1482

1583
def compute(self, prices: pd.DataFrame) -> pd.DataFrame:
16-
"""Compute historical volatility over lookback period."""
84+
"""
85+
Compute annualized historical volatility and return z-scored signals.
86+
87+
Returns
88+
-------
89+
pd.DataFrame
90+
DataFrame of the same columns (assets) with index trimmed so that the
91+
first row corresponds to the first full lookback window.
92+
"""
1793
self._validate_data(prices)
1894

95+
if prices.shape[0] < self.lookback:
96+
raise ValueError(f"Need at least {self.lookback} rows of data to compute volatility")
97+
98+
# pct change -> returns
1999
returns = prices.pct_change()
20100

21-
# Vectorized rolling std (annualized)
22-
vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252)
23-
vol = vol.iloc[self.lookback - 1:]
101+
# rolling std (population, ddof=0) and annualize
102+
vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt(
103+
TRADING_DAYS
104+
)
105+
106+
# Trim initial rows that don't correspond to a full window
107+
if self.lookback > 1:
108+
vol = vol.iloc[self.lookback - 1 :]
24109

25-
# Low-volatility anomaly (invert sign)
110+
# Invert sign for low-volatility preference and scale for numeric stability
26111
scores = -vol * 10.0
27112

28-
# Cross-sectional z-score
113+
# Ensure DataFrame (even for single-column)
114+
if isinstance(scores, pd.Series):
115+
scores = scores.to_frame(name=prices.columns[0])
116+
117+
# Cross-sectional z-score: (v - mean_row) / std_row
29118
if scores.shape[1] > 1:
30-
z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None]
119+
row_mean = scores.mean(axis=1)
120+
row_std = scores.std(axis=1).replace(0, np.nan) # avoid divide-by-zero
121+
# subtract mean and divide -- use broadcasting via .values for speed
122+
z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0)
31123
result = pd.DataFrame(z, index=scores.index, columns=scores.columns)
32124
else:
33-
result = scores
125+
# Single asset -> keep the scores DataFrame (no cross-sectional normalization)
126+
result = scores.copy()
34127

128+
# Store and return
35129
self._values = result
36130
return result
37131

38132

39133
class IdiosyncraticVolatility(VolatilityFactor):
40-
"""Vectorized idiosyncratic volatility relative to market model."""
134+
"""Compute idiosyncratic volatility relative to an equal-weighted market proxy.
135+
Implements a vectorized market-model approach:
136+
- compute rolling cov(ri, rm) and var(rm)
137+
- beta = cov / var
138+
- residuals = ri - beta * rm
139+
- idio_vol = rolling std(residuals) (annualized)
140+
Returns negative idio_vol (so low idio-vol -> high score) and z-scores cross-sectionally.
141+
"""
142+
143+
def __init__(self, lookback: int = 63, name: str = "idiosyncratic_volatility"):
144+
super().__init__(lookback=lookback, name=name)
41145

42146
def compute(self, prices: pd.DataFrame) -> pd.DataFrame:
43-
"""Compute idiosyncratic volatility using vectorized regression."""
44147
self._validate_data(prices)
45148

149+
# require enough rows to compute returns and rolling windows
150+
if prices.shape[0] < self.lookback + 1:
151+
raise ValueError(f"Need at least {self.lookback + 1} rows of data to compute idiosyncratic volatility")
152+
153+
# daily returns
46154
returns = prices.pct_change().dropna()
155+
if returns.shape[0] < self.lookback:
156+
raise ValueError(f"Need at least {self.lookback} non-NA return rows to compute idio-vol")
157+
158+
# Market proxy: equal-weighted mean across assets
47159
market = returns.mean(axis=1)
48160

49-
# Compute beta for each asset using vectorized covariance/variance
50-
cov_with_mkt = returns.mul(market, axis=0).rolling(window=self.lookback).mean() - (
51-
returns.rolling(window=self.lookback).mean().mul(market.rolling(window=self.lookback).mean(), axis=0)
52-
)
53-
market_var = market.rolling(window=self.lookback).var()
161+
# Rolling means for covariance decomposition
162+
returns_mean = returns.rolling(window=self.lookback, min_periods=self.lookback).mean()
163+
market_mean = market.rolling(window=self.lookback, min_periods=self.lookback).mean()
164+
165+
# Compute cov(ri, rm) via E[ri*rm] - E[ri]*E[rm]
166+
e_ri_rm = returns.mul(market, axis=0).rolling(window=self.lookback, min_periods=self.lookback).mean()
167+
cov_with_mkt = e_ri_rm - returns_mean.mul(market_mean, axis=0)
168+
169+
# market variance (vector) -- guard zeros
170+
market_var = market.rolling(window=self.lookback, min_periods=self.lookback).var(ddof=0).replace(0, np.nan)
171+
172+
# Beta: cov / var (division broadcasted over columns)
54173
beta = cov_with_mkt.div(market_var, axis=0)
55174

56-
# Predicted returns via market model
175+
# Predicted returns: beta * market (broadcasted)
57176
predicted = beta.mul(market, axis=0)
177+
178+
# Residuals (vectorized)
58179
residuals = returns - predicted
59180

60-
# Rolling residual std (annualized)
61-
idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252)
62-
idio_vol = idio_vol.iloc[self.lookback - 1:]
181+
# Rolling std of residuals (annualized)
182+
idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std(ddof=0) * np.sqrt(
183+
TRADING_DAYS
184+
)
185+
186+
# Trim to first full-window row
187+
if self.lookback > 1:
188+
idio_vol = idio_vol.iloc[self.lookback - 1 :]
63189

64-
# Invert sign (low-idio-vol performs better)
190+
# Negative idiosyncratic vol => prefer low idio-vol
65191
scores = -idio_vol
66192

67-
# Cross-sectional z-score normalization
193+
# Ensure DataFrame shape (in case of single-column)
194+
if isinstance(scores, pd.Series):
195+
scores = scores.to_frame(name=prices.columns[0])
196+
197+
# Cross-sectional z-score normalization if > 1 asset
68198
if scores.shape[1] > 1:
69-
z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None]
199+
row_mean = scores.mean(axis=1)
200+
row_std = scores.std(axis=1).replace(0, np.nan)
201+
z = (scores.sub(row_mean, axis=0)).div(row_std, axis=0)
70202
result = pd.DataFrame(z, index=scores.index, columns=scores.columns)
71203
else:
72-
result = scores
204+
result = scores.copy()
73205

206+
# Save and return
74207
self._values = result
75208
return result

0 commit comments

Comments
 (0)