Skip to content

Commit ee03857

Browse files
committed
Added new file DFM.py for GSOC 2025 Dynamical Factor Models
1 parent 24930b5 commit ee03857

File tree

1 file changed

+273
-0
lines changed
  • pymc_extras/statespace/models

1 file changed

+273
-0
lines changed

pymc_extras/statespace/models/DFM.py

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
from typing import Any
2+
3+
import numpy as np
4+
5+
from pymc_extras.statespace.core.statespace import PyMCStateSpace
6+
from pymc_extras.statespace.utils.constants import (
7+
ALL_STATE_DIM,
8+
AR_PARAM_DIM,
9+
MA_PARAM_DIM,
10+
SHOCK_DIM,
11+
)
12+
13+
14+
class BayesianDynamicFactor(PyMCStateSpace):
15+
r"""
16+
Dynamic Factor Models
17+
18+
Parameters
19+
----------
20+
k_endog : int
21+
Number of observed time series.
22+
23+
k_factors : int
24+
Number of latent factors.
25+
26+
factor_order : int
27+
Order of the VAR process for the latent factors.
28+
29+
exog : array_like, optional
30+
Array of exogenous regressors for the observation equation (nobs x k_exog).
31+
32+
error_order : int, optional
33+
Order of the AR process for the observation error component.
34+
Default is 0, corresponding to white noise errors.
35+
36+
error_var : bool, optional
37+
If True, errors are modeled jointly via a VAR process;
38+
otherwise, each error is modeled separately.
39+
40+
error_cov_type : {'scalar', 'diagonal', 'unstructured'}, optional
41+
Structure of the covariance matrix of the observation errors.
42+
43+
enforce_stationarity : bool, optional
44+
Whether to transform AR parameters to enforce stationarity.
45+
46+
filter_type : str, optional
47+
Type of Kalman filter to use. See PyMCStateSpace for valid options.
48+
49+
verbose : bool, optional
50+
If True, prints model setup details.
51+
52+
53+
54+
Notes
55+
-----
56+
This model implements a dynamic factor model in the spirit of
57+
statsmodels.tsa.statespace.dynamic_factor.DynamicFactor. The model assumes that
58+
the observed time series are driven by a set of latent factors that evolve
59+
according to a VAR process, possibly along with an autoregressive error term.
60+
61+
62+
63+
"""
64+
65+
def __init__(
66+
self,
67+
k_endog: int,
68+
k_factors: int,
69+
factor_order: int,
70+
exog: np.ndarray | None = None,
71+
error_order: int = 0,
72+
error_var: bool = False,
73+
error_cov_type: str = "diagonal",
74+
enforce_stationarity: bool = True,
75+
filter_type: str = "standard",
76+
verbose: bool = True,
77+
):
78+
self.k_endog = k_endog
79+
self.k_factors = k_factors
80+
self.factor_order = factor_order
81+
self.error_order = error_order
82+
self.error_var = error_var
83+
self.error_cov_type = error_cov_type
84+
self.enforce_stationarity = enforce_stationarity
85+
self.exog = exog
86+
87+
# Determine the dimension for the latent factor states.
88+
# For static factors, one might use k_factors.
89+
# For dynamic factors with lags, the state might include current factors and past lags.
90+
k_factor_states = k_factors * (1 + factor_order)
91+
92+
# Determine the dimension for the error component.
93+
# If error_order > 0 then we add additional states for error dynamics, otherwise white noise error.
94+
k_error_states = k_endog * (error_order + 1) if error_order > 0 else 0
95+
96+
# Total state dimension
97+
k_states = k_factor_states + k_error_states
98+
99+
# Number of independent shocks.
100+
# Typically, the latent factors introduce k_factors shocks.
101+
# If error_order > 0 and errors are modeled jointly or separately, add appropriate count.
102+
k_posdef = k_factors + (k_endog if error_order > 0 else 0)
103+
104+
# Initialize the PyMCStateSpace base class.
105+
super().__init__(
106+
k_endog=k_endog,
107+
k_states=k_states,
108+
k_posdef=k_posdef,
109+
filter_type=filter_type,
110+
verbose=verbose,
111+
measurement_error=False,
112+
)
113+
114+
@property
115+
def param_names(self):
116+
names = ["factor_loadings", "factor_ar", "error_ar", "error_sigma"]
117+
118+
# factor_sigma is fixed and equal to the identity matrix
119+
120+
# Handle cases where parameters should be excluded based on model settings
121+
if self.factor_order == 0:
122+
names.remove("factor_ar")
123+
if self.error_order == 0:
124+
names.remove("error_ar")
125+
if self.error_cov_type in ["diagonal", "scalar"]:
126+
names.remove("error_sigma")
127+
128+
return names
129+
130+
@property
131+
def param_info(self) -> dict[str, dict[str, Any]]:
132+
info = {
133+
"factor_loadings": {
134+
"shape": (self.k_endog, self.k_factors),
135+
"constraints": None,
136+
},
137+
"factor_ar": {
138+
"shape": (self.k_factors, self.factor_order, self.k_factors),
139+
"constraints": None,
140+
},
141+
"error_ar": {
142+
"shape": (self.k_endog, self.error_order, self.k_endog)
143+
if self.error_var
144+
else (self.k_endog, self.error_order),
145+
"constraints": None,
146+
},
147+
"error_sigma": {
148+
"shape": (self.k_endog,),
149+
"constraints": "Positive"
150+
if self.error_cov_type in ["diagonal", "scalar"]
151+
else "Positive Semi-definite",
152+
},
153+
"error_cov": {
154+
"shape": (self.k_endog, self.k_endog)
155+
if self.error_cov_type == "unstructured"
156+
else None,
157+
"constraints": "Positive Semi-definite"
158+
if self.error_cov_type == "unstructured"
159+
else None,
160+
},
161+
}
162+
163+
for name in self.param_names:
164+
info[name]["dims"] = self.param_dims[name]
165+
166+
return {name: info[name] for name in self.param_names}
167+
168+
@property
169+
def state_names(self):
170+
# Initialize state names based on the endogenous variables
171+
state_names = self.endog_names.copy()
172+
173+
# Add names for the factor loadings (one per observation and factor)
174+
for i in range(self.k_endog):
175+
for j in range(self.k_factors):
176+
state_names.append(f"loading_{i}_{j}")
177+
178+
# Add names for the factor autoregressive coefficients (for each factor's dynamics)
179+
for lag in range(1, self.factor_order + 1):
180+
for i in range(self.k_factors):
181+
for j in range(self.k_factors):
182+
state_names.append(f"factor_ar_{lag}_{i}_{j}")
183+
184+
# Add names for the error autoregressive coefficients (if error_order > 0)
185+
if self.error_order > 0:
186+
if self.error_cov_type == "diagonal":
187+
# Diagonal error AR, one parameter per series per lag
188+
for lag in range(1, self.error_order + 1):
189+
for i in range(self.k_endog):
190+
state_names.append(f"error_ar_{lag}_{i}")
191+
elif self.error_cov_type == "unstructured":
192+
# Full covariance error AR (unstructured), one for each pair of endogenous variables
193+
for lag in range(1, self.error_order + 1):
194+
for i in range(self.k_endog):
195+
for j in range(i + 1):
196+
state_names.append(f"error_ar_{lag}_{i}_{j}")
197+
198+
# Add names for the factor shocks' variances (one per factor)
199+
for i in range(self.k_factors):
200+
state_names.append(f"factor_sigma_{i}")
201+
202+
# Add names for the error shocks' variances/covariances
203+
if self.error_order > 0:
204+
if self.error_cov_type == "diagonal":
205+
# Diagonal error covariances (one per series)
206+
for i in range(self.k_endog):
207+
state_names.append(f"error_sigma_{i}")
208+
elif self.error_cov_type == "scalar":
209+
# Scalar error covariances (shared variance for all errors)
210+
state_names.append("error_sigma")
211+
elif self.error_cov_type == "unstructured":
212+
# Full error covariance matrix
213+
for i in range(self.k_endog):
214+
for j in range(i + 1):
215+
state_names.append(f"error_cov_{i}_{j}")
216+
217+
return state_names
218+
219+
@property
220+
def observed_states(self):
221+
return self.endog_names
222+
223+
@property
224+
def shock_names(self):
225+
shock_names = []
226+
227+
# Add names for factor shocks (one per factor)
228+
for i in range(self.k_factors):
229+
shock_names.append(f"factor_shock_{i}")
230+
231+
# Add names for idiosyncratic error shocks (one per observed variable)
232+
if self.error_order > 0:
233+
for i in range(self.k_endog):
234+
shock_names.append(f"error_shock_{i}")
235+
236+
return shock_names
237+
238+
239+
@property
240+
def param_dims(self):
241+
"""
242+
Define parameter dimensions for the Dynamic Factor Model (DFM).
243+
244+
Returns
245+
-------
246+
dict
247+
Dictionary mapping parameter names to their respective dimensions.
248+
"""
249+
coord_map = {
250+
"factor_loadings": (ALL_STATE_DIM, SHOCK_DIM), # Factor loadings dimension
251+
"factor_sigma": (SHOCK_DIM,), # Factor shocks (one per factor)
252+
}
253+
254+
# Factor AR coefficients if applicable
255+
if self.factor_order > 0:
256+
coord_map["factor_ar"] = (AR_PARAM_DIM, SHOCK_DIM, SHOCK_DIM)
257+
258+
# Error AR coefficients and variances
259+
if self.error_order > 0:
260+
if self.error_cov_type == "diagonal":
261+
coord_map["error_ar"] = (MA_PARAM_DIM, SHOCK_DIM) # AR for errors
262+
coord_map["error_sigma"] = (SHOCK_DIM,) # One variance for each observed variable
263+
elif self.error_cov_type == "scalar":
264+
coord_map["error_ar"] = (MA_PARAM_DIM, SHOCK_DIM)
265+
coord_map["error_sigma"] = None # Single scalar for error variance
266+
elif self.error_cov_type == "unstructured":
267+
coord_map["error_ar"] = (MA_PARAM_DIM, SHOCK_DIM, SHOCK_DIM) # AR for errors
268+
coord_map["error_cov_L"] = (SHOCK_DIM, SHOCK_DIM) # Lower triangular Cholesky factor
269+
coord_map["error_cov_sd"] = (SHOCK_DIM,) # Standard deviations for diagonal
270+
else:
271+
raise ValueError("Invalid error covariance type.")
272+
273+
return coord_map

0 commit comments

Comments
 (0)