Skip to content

Commit 9c50e94

Browse files
jessegrabowskiAlexAndorraaandorra-miaDekermanjian
authored
Multivariate Structural Statespace Components (#529)
* Reorganize structural model module * Allow combination of components with different numbers of observed states * Allow multiple observed in LevelTrend component * Allow multiple observed states in measurement error component * Allow multiple observed in AutoRegressive component * Fix typo in docstrings * Allow multiple observed in Cycle component * Fix Cycle docstring examples * Use pytensor block_diag for Cycle * 1. updated level_trend component coord/param labels 2. Adjusted the regression component to allow multivariate regression component specification 3. Added a notebook for quick evaluation of the adjustments and additions made * 1. removed incorrectly comitted file test_structural.py 2. replaced scipy block diag with pytensor block diag 3. Added forecast to test model in multivariate ssm notebook * removed incorrectly committed file structural.py * Always count names to determine k_endog * LevelTrend state/shock names depend on component name * Update tests to new names * More test updates * Delay dropping data names from states/coords until `.build` * Remove docstring typo * Update autoregressive component and tests * 1. updated regression component to revert to univariate shape schema if k_endog=1 2. adjusted coord name in test_statespace test_forecast_with_exog test to avoid key error 3. Added forecast plot and missing value state space section to notebook * Add component name to shock state names * Allow multiple observed in TimeSeasonality component * updated coord dimension names in regression component to always use the user given name as a prefix * updated level_trend component so that component name is added as a prefix to coord dimension names and updated tests in test_statespace accordingly * Allow multiple observed in FrequencySeasonality component * Propagate static shape information in join_tensors_by_dim_labels where possible * Regression component bugfix and tests * update default name in test * Improve cycle code with Jesse's feedback * Explicitly test matrices in test_cycle * Add test addition of two Cycles with different observed names * Make code for state cov when no innov clearer * 1. updated coords for level trend component and regression component 2. extended test_forecast to include multivariate forecasts with no exogenous variables * 1. added seasonality params coords fix from Alex Andorra 2. added datetime multivariate forecast tests no exogenous variables 3. added test to check that parameter shapes and coordinate dims agree with one another * 1. updated param dims in regression component 2. added all components to test_param_dims_coord * Add exog names to shock_names in LevelTrend * Update tests to respect new naming convention * added multivariate forecast with exogenous variables test * level_trend shock coords should be base_shock_names, not self.shock_names * Save static shape of last data dim * More static shapes * Broken test of decomposition with multiple observed * Don't use `pad` * fix decompose test * updated leveltrend, seasonal, cycle components to adhere to naming schema and updated tests in accordance to naming changes * Delete notebook * Use nwe name order in autoregressive component * Improve docstrings * Refactor test_regression to cover innotations = True * Fix comment in test_cycle * Add docstrings to core and measurement_error * Improve cycle and seasonal docstrings * Fix shape of AR params * Some other AR dims fixes * Cast all component parameters to lists in `Component.__init__` * Add type checking and errors for property combination * re-run structural notebook --------- Co-authored-by: Alexandre Andorra <[email protected]> Co-authored-by: Alex Andorra <[email protected]> Co-authored-by: Jonathan Dekermanjian <[email protected]>
1 parent 5ffaed6 commit 9c50e94

34 files changed

+5297
-2415
lines changed

notebooks/Structural Timeseries Modeling.ipynb

Lines changed: 415 additions & 363 deletions
Large diffs are not rendered by default.

pymc_extras/statespace/models/structural.py

Lines changed: 0 additions & 1679 deletions
This file was deleted.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pymc_extras.statespace.models.structural.components.autoregressive import (
2+
AutoregressiveComponent,
3+
)
4+
from pymc_extras.statespace.models.structural.components.cycle import CycleComponent
5+
from pymc_extras.statespace.models.structural.components.level_trend import LevelTrendComponent
6+
from pymc_extras.statespace.models.structural.components.measurement_error import MeasurementError
7+
from pymc_extras.statespace.models.structural.components.regression import RegressionComponent
8+
from pymc_extras.statespace.models.structural.components.seasonality import (
9+
FrequencySeasonality,
10+
TimeSeasonality,
11+
)
12+
13+
__all__ = [
14+
"LevelTrendComponent",
15+
"MeasurementError",
16+
"AutoregressiveComponent",
17+
"TimeSeasonality",
18+
"FrequencySeasonality",
19+
"RegressionComponent",
20+
"CycleComponent",
21+
]

pymc_extras/statespace/models/structural/components/__init__.py

Whitespace-only changes.
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import numpy as np
2+
import pytensor.tensor as pt
3+
4+
from pymc_extras.statespace.models.structural.core import Component
5+
from pymc_extras.statespace.models.structural.utils import order_to_mask
6+
from pymc_extras.statespace.utils.constants import AR_PARAM_DIM
7+
8+
9+
class AutoregressiveComponent(Component):
10+
r"""
11+
Autoregressive timeseries component
12+
13+
Parameters
14+
----------
15+
order: int or sequence of int
16+
17+
If int, the number of lags to include in the model.
18+
If a sequence, an array-like of zeros and ones indicating which lags to include in the model.
19+
20+
name: str, default "auto_regressive"
21+
A name for this autoregressive component. Used to label dimensions and coordinates.
22+
23+
observed_state_names: list[str] | None, default None
24+
List of strings for observed state labels. If None, defaults to ["data"].
25+
26+
Notes
27+
-----
28+
An autoregressive component can be thought of as a way o introducing serially correlated errors into the model.
29+
The process is modeled:
30+
31+
.. math::
32+
x_t = \sum_{i=1}^p \rho_i x_{t-i}
33+
34+
Where ``p``, the number of autoregressive terms to model, is the order of the process. By default, all lags up to
35+
``p`` are included in the model. To disable lags, pass a list of zeros and ones to the ``order`` argumnet. For
36+
example, ``order=[1, 1, 0, 1]`` would become:
37+
38+
.. math::
39+
x_t = \rho_1 x_{t-1} + \rho_2 x_{t-1} + \rho_4 x_{t-1}
40+
41+
The coefficient :math:`\rho_3` has been constrained to zero.
42+
43+
.. warning:: This class is meant to be used as a component in a structural time series model. For modeling of
44+
stationary processes with ARIMA, use ``statespace.BayesianSARIMA``.
45+
46+
Examples
47+
--------
48+
Model a timeseries as an AR(2) process with non-zero mean:
49+
50+
.. code:: python
51+
52+
from pymc_extras.statespace import structural as st
53+
import pymc as pm
54+
import pytensor.tensor as pt
55+
56+
trend = st.LevelTrendComponent(order=1, innovations_order=0)
57+
ar = st.AutoregressiveComponent(2)
58+
ss_mod = (trend + ar).build()
59+
60+
with pm.Model(coords=ss_mod.coords) as model:
61+
P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0'])
62+
intitial_trend = pm.Normal('initial_trend', sigma=10, dims=ss_mod.param_dims['initial_trend'])
63+
ar_params = pm.Normal('ar_params', dims=ss_mod.param_dims['ar_params'])
64+
sigma_ar = pm.Exponential('sigma_ar', 1, dims=ss_mod.param_dims['sigma_ar'])
65+
66+
ss_mod.build_statespace_graph(data)
67+
idata = pm.sample(nuts_sampler='numpyro')
68+
69+
"""
70+
71+
def __init__(
72+
self,
73+
order: int = 1,
74+
name: str = "auto_regressive",
75+
observed_state_names: list[str] | None = None,
76+
):
77+
if observed_state_names is None:
78+
observed_state_names = ["data"]
79+
80+
k_posdef = k_endog = len(observed_state_names)
81+
82+
order = order_to_mask(order)
83+
ar_lags = np.flatnonzero(order).ravel().astype(int) + 1
84+
k_states = len(order)
85+
86+
self.order = order
87+
self.ar_lags = ar_lags
88+
89+
super().__init__(
90+
name=name,
91+
k_endog=k_endog,
92+
k_states=k_states * k_endog,
93+
k_posdef=k_posdef,
94+
measurement_error=True,
95+
combine_hidden_states=True,
96+
observed_state_names=observed_state_names,
97+
obs_state_idxs=np.tile(np.r_[[1.0], np.zeros(k_states - 1)], k_endog),
98+
)
99+
100+
def populate_component_properties(self):
101+
k_states = self.k_states // self.k_endog # this is also the number of AR lags
102+
103+
self.state_names = [
104+
f"L{i + 1}[{state_name}]"
105+
for state_name in self.observed_state_names
106+
for i in range(k_states)
107+
]
108+
109+
self.shock_names = [f"{self.name}[{obs_name}]" for obs_name in self.observed_state_names]
110+
self.param_names = [f"params_{self.name}", f"sigma_{self.name}"]
111+
self.param_dims = {f"params_{self.name}": (f"lag_{self.name}",)}
112+
self.coords = {f"lag_{self.name}": self.ar_lags.tolist()}
113+
114+
if self.k_endog > 1:
115+
self.param_dims[f"params_{self.name}"] = (
116+
f"endog_{self.name}",
117+
AR_PARAM_DIM,
118+
)
119+
self.param_dims[f"sigma_{self.name}"] = (f"endog_{self.name}",)
120+
121+
self.coords[f"endog_{self.name}"] = self.observed_state_names
122+
123+
self.param_info = {
124+
f"params_{self.name}": {
125+
"shape": (k_states,) if self.k_endog == 1 else (self.k_endog, k_states),
126+
"constraints": None,
127+
"dims": (AR_PARAM_DIM,)
128+
if self.k_endog == 1
129+
else (
130+
f"endog_{self.name}",
131+
f"lag_{self.name}",
132+
),
133+
},
134+
f"sigma_{self.name}": {
135+
"shape": () if self.k_endog == 1 else (self.k_endog,),
136+
"constraints": "Positive",
137+
"dims": None if self.k_endog == 1 else (f"endog_{self.name}",),
138+
},
139+
}
140+
141+
def make_symbolic_graph(self) -> None:
142+
k_endog = self.k_endog
143+
k_states = self.k_states // k_endog
144+
k_posdef = self.k_posdef
145+
146+
k_nonzero = int(sum(self.order))
147+
ar_params = self.make_and_register_variable(
148+
f"params_{self.name}", shape=(k_nonzero,) if k_endog == 1 else (k_endog, k_nonzero)
149+
)
150+
sigma_ar = self.make_and_register_variable(
151+
f"sigma_{self.name}", shape=() if k_endog == 1 else (k_endog,)
152+
)
153+
154+
if k_endog == 1:
155+
T = pt.eye(k_states, k=-1)
156+
ar_idx = (np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0])
157+
T = T[ar_idx].set(ar_params)
158+
159+
else:
160+
transition_matrices = []
161+
162+
for i in range(k_endog):
163+
T = pt.eye(k_states, k=-1)
164+
ar_idx = (np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0])
165+
T = T[ar_idx].set(ar_params[i])
166+
transition_matrices.append(T)
167+
T = pt.specify_shape(
168+
pt.linalg.block_diag(*transition_matrices), (self.k_states, self.k_states)
169+
)
170+
171+
self.ssm["transition", :, :] = T
172+
173+
R = np.eye(k_states)
174+
R_mask = np.full((k_states), False)
175+
R_mask[0] = True
176+
R = R[:, R_mask]
177+
178+
self.ssm["selection", :, :] = pt.specify_shape(
179+
pt.linalg.block_diag(*[R for _ in range(k_endog)]), (self.k_states, self.k_posdef)
180+
)
181+
182+
Z = pt.zeros((1, k_states))[0, 0].set(1.0)
183+
self.ssm["design", :, :] = pt.specify_shape(
184+
pt.linalg.block_diag(*[Z for _ in range(k_endog)]), (self.k_endog, self.k_states)
185+
)
186+
187+
cov_idx = ("state_cov", *np.diag_indices(k_posdef))
188+
self.ssm[cov_idx] = sigma_ar**2

0 commit comments

Comments
 (0)