Skip to content

Commit 9f99632

Browse files
feat: optimize idiosyncratic volatility factor using vectorized covariance operations
1 parent f981e2c commit 9f99632

File tree

2 files changed

+100
-95
lines changed

2 files changed

+100
-95
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
Benchmark script to compare performance of factor computations.
3+
4+
Usage:
5+
python examples/benchmarks/benchmark_factors.py
6+
"""
7+
8+
import time
9+
import numpy as np
10+
import pandas as pd
11+
12+
from quant_research_starter.factors import (
13+
MomentumFactor,
14+
ValueFactor,
15+
SizeFactor,
16+
VolatilityFactor,
17+
IdiosyncraticVolatility,
18+
BollingerBandsFactor,
19+
)
20+
21+
def generate_synthetic_prices(n_assets: int = 500, n_days: int = 252 * 3) -> pd.DataFrame:
22+
"""Generate synthetic random walk price data for testing."""
23+
np.random.seed(42)
24+
returns = np.random.normal(0, 0.01, size=(n_days, n_assets))
25+
prices = 100 * np.exp(np.cumsum(returns, axis=0))
26+
dates = pd.date_range(end=pd.Timestamp.today(), periods=n_days, freq="B")
27+
tickers = [f"Stock_{i:03d}" for i in range(n_assets)]
28+
return pd.DataFrame(prices, index=dates, columns=tickers)
29+
30+
31+
def benchmark_factor(factor, prices: pd.DataFrame):
32+
"""Benchmark runtime of a given factor."""
33+
start = time.time()
34+
_ = factor.compute(prices)
35+
end = time.time()
36+
elapsed = end - start
37+
print(f"{factor.name:<25} | Lookback: {factor.lookback:<5} | Time: {elapsed:.3f} sec")
38+
39+
40+
def main():
41+
print("Generating synthetic data...")
42+
prices = generate_synthetic_prices(n_assets=500, n_days=252 * 3)
43+
print(f"Data shape: {prices.shape}")
44+
45+
print("\nRunning factor benchmarks...\n")
46+
47+
factors = [
48+
MomentumFactor(lookback=63),
49+
ValueFactor(),
50+
SizeFactor(),
51+
VolatilityFactor(lookback=21),
52+
IdiosyncraticVolatility(lookback=63),
53+
BollingerBandsFactor(lookback=20),
54+
]
55+
56+
for factor in factors:
57+
benchmark_factor(factor, prices)
58+
59+
60+
if __name__ == "__main__":
61+
main()
Lines changed: 39 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Volatility factor implementations."""
1+
"""Volatility factor implementations (vectorized)."""
22

33
import numpy as np
44
import pandas as pd
@@ -7,14 +7,7 @@
77

88

99
class VolatilityFactor(Factor):
10-
"""
11-
Volatility factors measuring different aspects of risk.
12-
13-
Common volatility measures:
14-
- Historical volatility (realized vol)
15-
- Idiosyncratic volatility
16-
- Volatility of volatility
17-
"""
10+
"""Computes historical volatility (annualized)."""
1811

1912
def __init__(self, lookback: int = 21, name: str = "volatility"):
2013
super().__init__(name=name, lookback=lookback)
@@ -23,109 +16,60 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame:
2316
"""Compute historical volatility over lookback period."""
2417
self._validate_data(prices)
2518

26-
if len(prices) < self.lookback:
27-
raise ValueError(f"Need at least {self.lookback} periods of data")
28-
29-
# Calculate returns
3019
returns = prices.pct_change()
3120

32-
# Compute rolling volatility (annualized); set min_periods to require full window
33-
volatility = returns.rolling(
34-
window=self.lookback, min_periods=self.lookback
35-
).std() * np.sqrt(252)
36-
37-
# Remove initial NaN values
38-
volatility = volatility.iloc[self.lookback - 1 :]
21+
# Vectorized rolling std (annualized)
22+
vol = returns.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252)
23+
vol = vol.iloc[self.lookback - 1:]
3924

40-
# Low volatility stocks tend to outperform (volatility anomaly)
41-
# Use scaled negative volatility to ensure clear negative signal in tests
42-
vol_scores = -volatility * 10.0
25+
# Low-volatility anomaly (invert sign)
26+
scores = -vol * 10.0
4327

44-
# Cross-sectional z-score when multiple columns; otherwise return scores
45-
if vol_scores.shape[1] > 1:
46-
vol_z = vol_scores.sub(vol_scores.mean(axis=1), axis=0)
47-
denom = vol_scores.std(axis=1).replace(0, np.nan)
48-
vol_z = vol_z.div(denom, axis=0)
49-
result = vol_z
28+
# Cross-sectional z-score
29+
if scores.shape[1] > 1:
30+
z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None]
31+
result = pd.DataFrame(z, index=scores.index, columns=scores.columns)
5032
else:
51-
# Single asset: use negative realized vol directly
52-
result = vol_scores
33+
result = scores
5334

5435
self._values = result
5536
return result
5637

5738

5839
class IdiosyncraticVolatility(VolatilityFactor):
59-
"""
60-
Idiosyncratic volatility relative to market model.
61-
62-
Measures stock-specific risk after accounting for market exposure.
63-
"""
40+
"""Vectorized idiosyncratic volatility relative to market model."""
6441

6542
def compute(self, prices: pd.DataFrame) -> pd.DataFrame:
66-
"""Compute idiosyncratic volatility from market model residuals."""
43+
"""Compute idiosyncratic volatility using vectorized regression."""
6744
self._validate_data(prices)
6845

69-
if len(prices) < self.lookback:
70-
raise ValueError(f"Need at least {self.lookback} periods of data")
71-
7246
returns = prices.pct_change().dropna()
73-
74-
# Use equal-weighted portfolio as market proxy
75-
market_returns = returns.mean(axis=1)
76-
77-
idiosyncratic_vol = pd.DataFrame(index=returns.index, columns=returns.columns)
78-
79-
# Compute rolling idiosyncratic volatility
80-
for symbol in returns.columns:
81-
stock_returns = returns[symbol]
82-
83-
def calc_idio_vol(window_returns):
84-
if len(window_returns) < 10: # Minimum observations for regression
85-
return np.nan
86-
87-
# Simple market model regression
88-
X = market_returns.loc[window_returns.index].values.reshape(-1, 1)
89-
y = window_returns.values
90-
91-
# Remove NaN values
92-
mask = ~(np.isnan(X) | np.isnan(y))
93-
X_clean = X[mask[:, 0]]
94-
y_clean = y[mask[:, 0]]
95-
96-
if len(X_clean) < 10:
97-
return np.nan
98-
99-
try:
100-
# Calculate residuals via simple OLS beta
101-
x = X_clean.flatten()
102-
x_var = np.var(x)
103-
if x_var == 0:
104-
return np.nan
105-
beta = np.cov(y_clean, x)[0, 1] / x_var
106-
residuals = y_clean - beta * x
107-
return np.std(residuals) * np.sqrt(252)
108-
except Exception:
109-
return np.nan
110-
111-
idiosyncratic_vol[symbol] = stock_returns.rolling(
112-
window=self.lookback
113-
).apply(calc_idio_vol, raw=False)
114-
115-
# Remove initial NaN values
116-
idiosyncratic_vol = idiosyncratic_vol.iloc[self.lookback - 1 :]
117-
118-
# Negative relationship with returns (idiosyncratic vol anomaly)
119-
idio_scores = -idiosyncratic_vol
120-
121-
# Z-score normalize when multiple assets; otherwise return scores
122-
if idio_scores.shape[1] > 1:
123-
idio_z = idio_scores.sub(idio_scores.mean(axis=1), axis=0)
124-
denom = idio_scores.std(axis=1).replace(0, np.nan)
125-
idio_z = idio_z.div(denom, axis=0)
126-
result = idio_z
47+
market = returns.mean(axis=1)
48+
49+
# Compute beta for each asset using vectorized covariance/variance
50+
cov_with_mkt = returns.mul(market, axis=0).rolling(window=self.lookback).mean() - (
51+
returns.rolling(window=self.lookback).mean().mul(market.rolling(window=self.lookback).mean(), axis=0)
52+
)
53+
market_var = market.rolling(window=self.lookback).var()
54+
beta = cov_with_mkt.div(market_var, axis=0)
55+
56+
# Predicted returns via market model
57+
predicted = beta.mul(market, axis=0)
58+
residuals = returns - predicted
59+
60+
# Rolling residual std (annualized)
61+
idio_vol = residuals.rolling(window=self.lookback, min_periods=self.lookback).std() * np.sqrt(252)
62+
idio_vol = idio_vol.iloc[self.lookback - 1:]
63+
64+
# Invert sign (low-idio-vol performs better)
65+
scores = -idio_vol
66+
67+
# Cross-sectional z-score normalization
68+
if scores.shape[1] > 1:
69+
z = (scores - scores.mean(axis=1).values[:, None]) / scores.std(axis=1).values[:, None]
70+
result = pd.DataFrame(z, index=scores.index, columns=scores.columns)
12771
else:
128-
result = idio_scores
72+
result = scores
12973

13074
self._values = result
13175
return result

0 commit comments

Comments
 (0)