Skip to content

Commit 21560db

Browse files
committed
DFM model and test added after deleting wrong merge operation
1 parent eaba40e commit 21560db

File tree

3 files changed

+234
-4
lines changed

3 files changed

+234
-4
lines changed

pymc_extras/statespace/models/DFM.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ class BayesianDynamicFactor(PyMCStateSpace):
4141
Names of the observed time series. If not provided, default names will be generated as `endog_1`, `endog_2`, ..., `endog_k` based on `k_endog`.
4242
At least one of `k_endog` or `endog_names` must be provided.
4343
44+
k_endog : int
45+
Number of observed time series.
46+
47+
endog_names : Sequence[str], optional
48+
Names of the observed time series. If not provided, default names will be generated as `endog_1`, `endog_2`, ..., `endog_k`.
49+
4450
exog : array_like, optional
4551
Array of exogenous regressors for the observation equation (nobs x k_exog).
4652
Default is None, meaning no exogenous regressors.

pymc_extras/statespace/utils/constants.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
SHOCK_DIM = "shock"
88
SHOCK_AUX_DIM = "shock_aux"
99
TIME_DIM = "time"
10-
AR_PARAM_DIM = "ar_lag"
11-
MA_PARAM_DIM = "ma_lag"
12-
SEASONAL_AR_PARAM_DIM = "seasonal_ar_lag"
13-
SEASONAL_MA_PARAM_DIM = "seasonal_ma_lag"
10+
AR_PARAM_DIM = "lag_ar"
11+
MA_PARAM_DIM = "lag_ma"
12+
SEASONAL_AR_PARAM_DIM = "seasonal_lag_ar"
13+
SEASONAL_MA_PARAM_DIM = "seasonal_lag_ma"
1414
ETS_SEASONAL_DIM = "seasonal_lag"
1515
FACTOR_DIM = "factor"
1616
ERROR_AR_PARAM_DIM = "error_ar_param"

tests/statespace/models/test_DFM.py

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pymc as pm
4+
import pytensor
5+
import pytensor.tensor as pt
6+
import pytest
7+
import statsmodels.api as sm
8+
9+
from numpy.testing import assert_allclose
10+
from statsmodels.tsa.statespace.dynamic_factor import DynamicFactor
11+
12+
from pymc_extras.statespace.models.DFM import BayesianDynamicFactor
13+
14+
floatX = pytensor.config.floatX
15+
16+
17+
@pytest.fixture(scope="session")
18+
def data():
19+
df = pd.read_csv(
20+
"tests/statespace/_data/statsmodels_macrodata_processed.csv",
21+
index_col=0,
22+
parse_dates=True,
23+
).astype(floatX)
24+
df.index.freq = df.index.inferred_freq
25+
return df
26+
27+
28+
@pytest.mark.parametrize("k_factors", [1, 2])
29+
@pytest.mark.parametrize("factor_order", [1, 2, 3, 4])
30+
@pytest.mark.parametrize("error_order", [1, 2, 3, 4])
31+
def test_dfm_matrices_match_statsmodels(data, k_factors, factor_order, error_order):
32+
y = data
33+
print(data.shape)
34+
n_obs, k_endog = y.shape
35+
print(n_obs, k_endog)
36+
37+
with pm.Model():
38+
dfm = BayesianDynamicFactor(
39+
k_factors=k_factors,
40+
factor_order=factor_order,
41+
k_endog=k_endog,
42+
error_order=error_order,
43+
error_var=False,
44+
verbose=True,
45+
)
46+
factor_part = max(1, factor_order) * k_factors
47+
error_part = error_order * k_endog if error_order > 0 else 0
48+
k_states = factor_part + error_part
49+
50+
pm.Normal("x0", mu=0.0, sigma=1.0, shape=(k_states,))
51+
pm.HalfNormal("P0", sigma=1.0, shape=(k_states, k_states))
52+
53+
pm.Normal("factor_loadings", mu=0.0, sigma=1.0, shape=(k_endog, k_factors))
54+
pm.Normal("factor_ar", mu=0.0, sigma=1.0, shape=(k_factors, factor_order))
55+
pm.HalfNormal("factor_sigma", sigma=1.0, shape=(k_factors,))
56+
57+
pm.Normal("error_ar", mu=0.0, sigma=1.0, shape=(k_endog, error_order))
58+
pm.HalfNormal("error_sigma", sigma=1.0, shape=(k_endog,))
59+
60+
dfm.build_statespace_graph(data=data)
61+
ssm_pymc = dfm.unpack_statespace()
62+
63+
# Build statsmodels DynamicFactor model
64+
sm_dfm = DynamicFactor(
65+
endog=y,
66+
k_factors=k_factors,
67+
factor_order=factor_order,
68+
error_order=error_order,
69+
)
70+
71+
x0 = np.zeros(k_states)
72+
P0 = np.eye(k_states)
73+
sm_dfm.initialize_known(initial_state=x0, initial_state_cov=P0)
74+
# print(sm_dfm.ssm.valid_names)
75+
76+
# Extract relevant matrices from statsmodels
77+
# sm_matrices = [sm_dfm.ssm[m] for m in ["initial_state", "initial_state_cov", "state_intercept", "obs_intercept", "transition", "design", "selection","obs_cov", "state_cov"]]
78+
sm_matrices = sm_dfm.ssm
79+
80+
# Matrices arestore in same order in PyMC model and statsmodels model
81+
82+
# Compile pymc tensors to numpy arrays with dummy inputs
83+
pymc_inputs = list(pytensor.graph.basic.explicit_graph_inputs(ssm_pymc))
84+
f_ssm = pytensor.function(pymc_inputs, ssm_pymc)
85+
86+
# Provide dummy inputs: zeros or identities as appropriate
87+
test_vals = {}
88+
for v in pymc_inputs:
89+
shape = v.type.shape
90+
if len(shape) == 0:
91+
test_vals[v.name] = 0.5
92+
else:
93+
# For covariance matrices, use identity, else fill with 0.5
94+
if "cov" in v.name or "state_cov" in v.name or "obs_cov" in v.name:
95+
test_vals[v.name] = np.eye(shape[0], shape[1], dtype=pytensor.config.floatX)
96+
else:
97+
test_vals[v.name] = np.full(shape, 0.5, dtype=pytensor.config.floatX)
98+
99+
pymc_matrices = f_ssm(**test_vals)
100+
101+
matrix_names = [
102+
"initial_state",
103+
"initial_state_cov",
104+
"state_intercept",
105+
"obs_intercept",
106+
"transition",
107+
"design",
108+
"selection",
109+
"obs_cov",
110+
"state_cov",
111+
]
112+
for pm_mat, sm_mat, name in zip(pymc_matrices, sm_matrices, matrix_names):
113+
assert_allclose(pm_mat, sm_mat, atol=1e-5, err_msg=f"Mismatch in matrix {name}")
114+
115+
116+
@pytest.mark.parametrize("k_factors", [1])
117+
@pytest.mark.parametrize("factor_order", [2])
118+
@pytest.mark.parametrize("error_order", [1])
119+
def test_loglike_matches_statsmodels(data, k_factors, factor_order, error_order):
120+
"""
121+
Tests if the log-likelihood calculated by the PyMC statespace model matches
122+
the one from statsmodels when given the same parameters.
123+
"""
124+
k_endog = data.shape[1]
125+
endog_names = data.columns.tolist() # ['realgdp', 'realcons', 'realinv']
126+
127+
sm_dfm = DynamicFactor(
128+
endog=data,
129+
k_factors=k_factors,
130+
factor_order=factor_order,
131+
error_order=error_order,
132+
)
133+
print(sm_dfm.param_names)
134+
135+
test_params = {
136+
# Loadings are now separate parameters
137+
"loading.f1.realgdp": 0.9,
138+
"loading.f1.realcons": -0.8,
139+
"loading.f1.realinv": 0.7,
140+
# Factor AR coefficients
141+
"L1.f1.f1": 0.5,
142+
"L2.f1.f1": 0.2,
143+
# Error AR coefficients use parenthesis
144+
"L1.e(realgdp).e(realgdp)": 0.4,
145+
"L1.e(realcons).e(realcons)": 0.2,
146+
"L1.e(realinv).e(realinv)": -0.1,
147+
# Error variances
148+
"sigma2.realgdp": 0.5,
149+
"sigma2.realcons": 0.4,
150+
"sigma2.realinv": 0.3,
151+
# NOTE: 'sigma2.f1' NOT included, as it's fixed to 1 by default.
152+
}
153+
154+
assert set(test_params.keys()) == set(sm_dfm.param_names)
155+
params_vector = np.array([test_params[name] for name in sm_dfm.param_names])
156+
157+
# Calculate the log-likelihood in statsmodels
158+
sm_loglike = sm_dfm.loglike(params_vector)
159+
160+
# Build the PyMC model and compile its logp function
161+
with pm.Model() as model:
162+
factor_part = max(1, factor_order) * k_factors
163+
error_part = error_order * k_endog if error_order > 0 else 0
164+
k_states = factor_part + error_part
165+
166+
pm.Normal("x0", mu=0.0, sigma=1.0, shape=(k_states,))
167+
pm.Deterministic("P0", pt.eye(k_states, k_states))
168+
169+
pm.Normal("factor_loadings", mu=0.0, sigma=1.0, shape=(k_endog, k_factors))
170+
pm.Normal("factor_ar", mu=0.0, sigma=1.0, shape=(k_factors, factor_order))
171+
pm.Deterministic("factor_sigma", pt.ones(k_factors))
172+
173+
pm.Normal("error_ar", mu=0.0, sigma=1.0, shape=(k_endog, error_order))
174+
pm.HalfNormal("error_sigma", sigma=1.0, shape=(k_endog,))
175+
176+
dfm = BayesianDynamicFactor(
177+
k_factors=k_factors, factor_order=factor_order, k_endog=k_endog, error_order=error_order
178+
)
179+
180+
dfm.build_statespace_graph(data, "data_ssm")
181+
logp_fn = model.compile_logp()
182+
183+
# Evaluate the PyMC log-likelihood using the same parameters
184+
pymc_param_values = {
185+
"x0": np.zeros(k_states),
186+
#'P0_log__': np.log(np.eye(k_states)),
187+
"factor_loadings": np.array(
188+
[
189+
test_params["loading.f1.realgdp"],
190+
test_params["loading.f1.realcons"],
191+
test_params["loading.f1.realinv"],
192+
]
193+
).reshape(k_endog, k_factors),
194+
"factor_ar": np.array([test_params["L1.f1.f1"], test_params["L2.f1.f1"]]).reshape(
195+
k_factors, factor_order
196+
),
197+
#'factor_sigma_log__': np.log(np.array([1.0])),
198+
"error_ar": np.array(
199+
[
200+
test_params["L1.e(realgdp).e(realgdp)"],
201+
test_params["L1.e(realcons).e(realcons)"],
202+
test_params["L1.e(realinv).e(realinv)"],
203+
]
204+
).reshape(k_endog, error_order),
205+
"error_sigma_log__": np.log(
206+
np.sqrt(
207+
np.array(
208+
[ # Log-transformed
209+
test_params["sigma2.realgdp"],
210+
test_params["sigma2.realcons"],
211+
test_params["sigma2.realinv"],
212+
]
213+
)
214+
)
215+
),
216+
}
217+
pymc_loglike = logp_fn(pymc_param_values)
218+
219+
assert_allclose(
220+
pymc_loglike,
221+
sm_loglike,
222+
rtol=1e-5,
223+
err_msg="PyMC log-likelihood does not match statsmodels for manually specified parameters",
224+
)

0 commit comments

Comments
 (0)