11
11
ALL_STATE_AUX_DIM ,
12
12
ALL_STATE_DIM ,
13
13
AR_PARAM_DIM ,
14
+ ERROR_AR_PARAM_DIM ,
14
15
FACTOR_DIM ,
15
16
OBS_STATE_AUX_DIM ,
16
17
OBS_STATE_DIM ,
@@ -31,11 +32,13 @@ class BayesianDynamicFactor(PyMCStateSpace):
31
32
factor_order : int
32
33
Order of the VAR process for the latent factors.
33
34
34
- k_endog : int
35
- Number of observed time series.
35
+ k_endog : int, optional
36
+ Number of observed time series. If not provided, the number of observed series will be inferred from `endog_names`.
37
+ At least one of `k_endog` or `endog_names` must be provided.
36
38
37
- endog_names : Sequence[str], optional
38
- Names of the observed time series. If not provided, default names will be generated as `endog_1`, `endog_2`, ..., `endog_k`.
39
+ endog_names : list of str, optional
40
+ Names of the observed time series. If not provided, default names will be generated as `endog_1`, `endog_2`, ..., `endog_k` based on `k_endog`.
41
+ At least one of `k_endog` or `endog_names` must be provided.
39
42
40
43
exog : array_like, optional
41
44
Array of exogenous regressors for the observation equation (nobs x k_exog).
@@ -60,14 +63,90 @@ class BayesianDynamicFactor(PyMCStateSpace):
60
63
verbose: bool, default True
61
64
If true, a message will be logged to the terminal explaining the variable names, dimensions, and supports.
62
65
66
+
63
67
Notes
64
68
-----
65
- This model implements a dynamic factor model in the spirit of
66
- statsmodels.tsa.statespace.dynamic_factor.DynamicFactor. The model assumes that
67
- the observed time series are driven by a set of latent factors that evolve
68
- according to a VAR process, possibly along with an autoregressive error term.
69
-
69
+ The Dynamic Factor Model (DFM) is a multivariate state-space model used to represent high-dimensional time series
70
+ as driven by a smaller set of unobserved dynamic factors. Given a set of observed time series
71
+ :math:`\{y_t\}_{t=0}^T`, with :math:`y_t = \begin{bmatrix} y_{1,t} & y_{2,t} & \cdots & y_{k_endog,t} \end{bmatrix}^T`,
72
+ the DFM assumes that each series is a linear combination of a few latent factors and optional autoregressive errors.
73
+
74
+ Specifically, denoting the number of dynamic factors as :math:`k_factors`, the order of the latent factor
75
+ process as :math:`p = \text{factor\_order}`, and the order of the observation error as
76
+ :math:`q = \text{error\_order}`, the model is written as:
77
+
78
+ .. math::
79
+ y_t & = \Lambda f_t + B x_t + u_t \\
80
+ f_t & = A_1 f_{t-1} + \dots + A_p f_{t-p} + \eta_t \\
81
+ u_t & = C_1 u_{t-1} + \dots + C_q u_{t-q} + \varepsilon_t
82
+
83
+
84
+ Where:
85
+ - :math:`f_t` is a vector of latent factors following a VAR(p) process:
86
+ - :math:`\x_t` are optional exogenous vectors (Not implemented yet).
87
+ - :math:`u_t` is a vector of observation errors, possibly VAR(q) if error_var = True otherwise treated as individual autoregressions.
88
+ - :math:`\eta_t` and :math:`\varepsilon_t` are white noise error terms. In order to identify the factors, :math:`Var(\eta_t) = I`.
89
+ Denote :math:`Var(\varepsilon_t) \equiv \Sigma`.
90
+
91
+
92
+ Internally, this model is represented in state-space form by stacking all current and lagged latent factors and,
93
+ if present, autoregressive observation errors into a single state vector. The full state vector has dimension
94
+ :math:`k_factors \cdot factor_order + k_endog \cdot error_order`, where :math:`k_endog` is the number of observed time series.
95
+
96
+ The number of independent shocks in the system (i.e., the number of nonzero diagonal elements in the state noise
97
+ covariance matrix) is equal to the number of latent factors plus the number of observed series if AR errors are present.
98
+
99
+ As in other high-dimensional models, identification can be an issue, especially when many observed series load on few
100
+ factors. Careful prior specification is typically required for good estimation.
101
+
102
+ Currently, the implementation assumes same factor order for all the factors,
103
+ does not yet support measurement error, exogenous variables and joint (VAR) error modeling.
104
+
105
+ Examples
106
+ --------
107
+ The following code snippet estimates a dynamic factor model with 1 latent factors,
108
+ a AR(2) structure on the factor and a AR(1) structure on the errors:
109
+
110
+ .. code:: python
111
+
112
+ import pymc_extras.statespace as pmss
113
+ import pymc as pm
114
+
115
+ # Create DFM Statespace Model
116
+ dfm_mod = pmss.BayesianDynamicFactor(
117
+ k_factors=1,
118
+ factor_order=2,
119
+ endog_names=data.columns,
120
+ error_order=1,
121
+ error_var=False,
122
+ error_cov_type="diagonal",
123
+ filter_type="standard",
124
+ verbose=True
125
+ )
70
126
127
+ # Unpack dims and coords
128
+ x0_dims, P0_dims, factor_loadings_dims, factor_sigma_dims, factor_ar_dims, error_ar_dims, error_sigma_dims = dfm_mod.param_dims.values()
129
+ coords = dfm_mod.coords
130
+
131
+ with pm.Model(coords=coords) as pymc_mod:
132
+ # Initial state
133
+ x0 = pm.Normal("x0", dims=x0_dims)
134
+ P0 = pm.Normal("P0", dims=P0_dims)
135
+ factor_loadings = pm.Normal("factor_loadings", sigma=1, dims=factor_loadings_dims)
136
+ factor_ar = pm.Normal("factor_ar", sigma=1, dims=factor_ar_dims)
137
+ factor_sigma = pm.Deterministic("factor_sigma", pt.constant([1.0], dtype=float))
138
+ error_ar = pm.Normal("error_ar", sigma=1, dims=error_ar_dims)
139
+ sigmas = pm.HalfNormal("error_sigma", dims=error_sigma_dims)
140
+ # Build symbolic graph
141
+ dfm_mod.build_statespace_graph(data=data, mode="JAX")
142
+
143
+ with pymc_mod:
144
+ idata = pm.sample(
145
+ draws=500,
146
+ chains=2,
147
+ nuts_sampler="nutpie",
148
+ nuts_sampler_kwargs={"backend": "jax", "gradient_backend": "jax"},
149
+ )
71
150
72
151
"""
73
152
@@ -95,6 +174,8 @@ def __init__(
95
174
raise NotImplementedError (
96
175
"Joint error modeling (error_var=True) is not yet implemented."
97
176
)
177
+ if exog is not None :
178
+ raise NotImplementedError ("Exogenous variables (exog) are not yet implemented." )
98
179
99
180
self .endog_names = endog_names
100
181
self .k_endog = k_endog
@@ -110,7 +191,7 @@ def __init__(
110
191
# Determine the dimension for the latent factor states.
111
192
# For static factors, one might use k_factors.
112
193
# For dynamic factors with lags, the state might include current factors and past lags.
113
- # TODO: what if we want different factor orders for different factors?
194
+ # TODO: what if we want different factor orders for different factors? (follow suggestions in GitHub)
114
195
k_factor_states = k_factors * factor_order
115
196
116
197
# Determine the dimension for the error component.
@@ -152,7 +233,7 @@ def param_names(self):
152
233
names .remove ("factor_ar" )
153
234
if self .error_order == 0 :
154
235
names .remove ("error_ar" )
155
- if self .error_cov_type in [ "unstructured" ] :
236
+ if self .error_cov_type == "unstructured" :
156
237
names .remove ("error_sigma" )
157
238
names .append ("error_cov" )
158
239
@@ -186,7 +267,7 @@ def param_info(self) -> dict[str, dict[str, Any]]:
186
267
"constraints" : None ,
187
268
},
188
269
"error_sigma" : {
189
- "shape" : (self .k_endog ,) if self .error_cov_type in [ "diagonal" ] else (),
270
+ "shape" : (self .k_endog ,) if self .error_cov_type == "diagonal" else (),
190
271
"constraints" : "Positive" ,
191
272
},
192
273
"error_cov" : {
@@ -207,17 +288,17 @@ def state_names(self) -> list[str]:
207
288
then idiosyncratic error states (with lags).
208
289
"""
209
290
names = []
210
-
291
+ # TODO adjust notation by looking at the VARMAX implementation
211
292
# Factor states
212
293
for i in range (self .k_factors ):
213
294
for lag in range (self .factor_order ):
214
- names .append (f"factor_{ i + 1 } _lag { lag } " )
295
+ names .append (f"L { lag } . factor_{ i + 1 } " )
215
296
216
297
# Idiosyncratic error states
217
298
if self .error_order > 0 :
218
299
for i in range (self .k_endog ):
219
300
for lag in range (self .error_order ):
220
- names .append (f"error_{ i + 1 } _lag { lag } " )
301
+ names .append (f"L { lag } . error_{ i + 1 } " )
221
302
222
303
return names
223
304
@@ -231,7 +312,7 @@ def observed_states(self) -> list[str]:
231
312
@property
232
313
def coords (self ) -> dict [str , Sequence ]:
233
314
coords = make_default_coords (self )
234
-
315
+ # Add factor dimensions
235
316
coords [FACTOR_DIM ] = [f"factor_{ i + 1 } " for i in range (self .k_factors )]
236
317
237
318
# AR parameter dimensions - add if needed
@@ -240,7 +321,7 @@ def coords(self) -> dict[str, Sequence]:
240
321
241
322
# If error_order > 0
242
323
if self .error_order > 0 :
243
- coords ["error_ar_param" ] = list (range (1 , self .error_order + 1 ))
324
+ coords [ERROR_AR_PARAM_DIM ] = list (range (1 , self .error_order + 1 ))
244
325
245
326
return coords
246
327
@@ -272,13 +353,13 @@ def param_dims(self):
272
353
coord_map ["factor_ar" ] = (FACTOR_DIM , AR_PARAM_DIM )
273
354
274
355
if self .error_order > 0 :
275
- coord_map ["error_ar" ] = (OBS_STATE_DIM , "error_ar_param" )
356
+ coord_map ["error_ar" ] = (OBS_STATE_DIM , ERROR_AR_PARAM_DIM )
276
357
277
358
if self .error_cov_type in ["scalar" ]:
278
359
coord_map ["error_sigma" ] = ()
279
360
elif self .error_cov_type in ["diagonal" ]:
280
361
coord_map ["error_sigma" ] = (OBS_STATE_DIM ,)
281
- elif self .error_cov_type in [ "unstructured" ] :
362
+ if self .error_cov_type == "unstructured" :
282
363
coord_map ["error_sigma" ] = (OBS_STATE_DIM , OBS_STATE_AUX_DIM )
283
364
284
365
return coord_map
0 commit comments