Vectorized full state space model

aandorra-mia · aandorra-mia · commit 50a97482c3af · 2025-05-09T12:07:03.000-04:00
diff --git a/pymc_extras/statespace/core/representation.py b/pymc_extras/statespace/core/representation.py
@@ -60,12 +60,12 @@ class PytensorRepresentation:
 
     .. math::
         \begin{align}
-            x_t &= A_t x_{t-1} + c_t + R_t \varepsilon_t \tag{1} \\
+            x_t &= T_t x_{t-1} + c_t + R_t \varepsilon_t \tag{1} \\
             y_t &= Z_t x_t + d_t + \eta_t \tag{2} \\
         \end{align}
 
     Where :math:`\{x_t\}_{t=0}^T` is a trajectory of hidden states, and :math:`\{y_t\}_{t=0}^T` is a trajectory of
-    observable states. Equation 1 is known as the "state transition equation", while describes how the system evolves
+    observable states. Equation 1 is known as the "state transition equation", which describes how the system evolves
     over time. Equation 2 is the "observation equation", and maps the latent state processes to observed data.
     The system is Gaussian when the innovations, :math:`\varepsilon_t`, and the measurement errors, :math:`\eta_t`,
     are normally distributed. The definition is completed by specification of these distributions, as
diff --git a/pymc_extras/statespace/core/statespace.py b/pymc_extras/statespace/core/statespace.py
@@ -15,6 +15,7 @@
 from pymc.util import RandomState
 from pytensor import Variable, graph_replace
 from pytensor.compile import get_mode
+from pytensor.graph.replace import vectorize_graph
 from rich.box import SIMPLE_HEAD
 from rich.console import Console
 from rich.table import Table
@@ -37,6 +38,7 @@
     FILTER_OUTPUT_DIMS,
     FILTER_OUTPUT_TYPES,
     JITTER_DEFAULT,
+    LONG_MATRIX_NAMES,
     MATRIX_DIMS,
     MATRIX_NAMES,
     OBS_STATE_DIM,
@@ -46,7 +48,6 @@
     VECTOR_VALUED,
 )
 from pymc_extras.statespace.utils.data_tools import register_data_with_pymc
-from pytensor.graph.replace import vectorize_graph
 
 _log = logging.getLogger("pymc.experimental.statespace")
 
@@ -61,7 +62,7 @@
 def _validate_filter_arg(filter_arg):
     if filter_arg.lower() not in FILTER_OUTPUT_TYPES:
         raise ValueError(
-            f'filter_output should be one of {", ".join(FILTER_OUTPUT_TYPES)}, received {filter_arg}'
+            f"filter_output should be one of {', '.join(FILTER_OUTPUT_TYPES)}, received {filter_arg}"
         )
 
 
@@ -728,6 +729,8 @@ def _insert_random_variables(self):
 
         replacement_dict = {var: pymc_model[name] for name, var in self._name_to_variable.items()}
         self.subbed_ssm = vectorize_graph(matrices, replace=replacement_dict)
+        for name, matrix in zip(LONG_MATRIX_NAMES, self.subbed_ssm):
+            matrix.name = name
 
     def _insert_data_variables(self):
         """
diff --git a/pymc_extras/statespace/filters/kalman_filter.py b/pymc_extras/statespace/filters/kalman_filter.py
@@ -17,7 +17,7 @@
     split_vars_into_seq_and_nonseq,
     stabilize,
 )
-from pymc_extras.statespace.utils.constants import JITTER_DEFAULT, MISSING_FILL, ALL_KF_OUTPUT_NAMES
+from pymc_extras.statespace.utils.constants import ALL_KF_OUTPUT_NAMES, JITTER_DEFAULT, MISSING_FILL
 
 MVN_CONST = pt.log(2 * pt.constant(np.pi, dtype="float64"))
 PARAM_NAMES = ["c", "d", "T", "Z", "R", "H", "Q"]
@@ -85,14 +85,23 @@ def _make_gufunc_signature(self, inputs):
             "data": (time, obs),
             "a0": (states,),
             "x0": (states,),
+            "initial_state": (states,),
             "P0": (states, states),
+            "initial_state_cov": (states, states),
             "c": (states,),
+            "state_intercept": (states,),
             "d": (obs,),
+            "obs_intercept": (obs,),
             "T": (states, states),
+            "transition": (states, states),
             "Z": (obs, states),
+            "design": (obs, states),
             "R": (states, exog),
+            "selection": (states, exog),
             "H": (obs, obs),
+            "obs_cov": (obs, obs),
             "Q": (exog, exog),
+            "state_cov": (exog, exog),
             "filtered_states": (time, states),
             "filtered_covariances": (time, states, states),
             "predicted_states": (time, states),
@@ -322,6 +331,7 @@ def build_graph(
             cov_jitter=cov_jitter,
         )
         filter_outputs = pt.vectorize(fn, signature=signature)(data, a0, P0, c, d, T, Z, R, H, Q)
+        # filter_outputs = fn(data, a0, P0, c, d, T, Z, R, H, Q)
         for output, name in zip(filter_outputs, ALL_KF_OUTPUT_NAMES):
             output.name = name
 
diff --git a/pymc_extras/statespace/filters/kalman_smoother.py b/pymc_extras/statespace/filters/kalman_smoother.py
@@ -1,8 +1,11 @@
+from functools import partial
+
 import pytensor
 import pytensor.tensor as pt
-from functools import partial
+
 from pytensor.compile import get_mode
 from pytensor.tensor.nlinalg import matrix_dot
+
 from pymc_extras.statespace.filters.utilities import (
     quad_form_sym,
     split_vars_into_seq_and_nonseq,
@@ -74,14 +77,23 @@ def _make_gufunc_signature(self, inputs):
             "data": (time, obs),
             "a0": (states,),
             "x0": (states,),
+            "initial_state": (states,),
             "P0": (states, states),
+            "initial_state_cov": (states, states),
             "c": (states,),
+            "state_intercept": (states,),
             "d": (obs,),
+            "obs_intercept": (obs,),
             "T": (states, states),
+            "transition": (states, states),
             "Z": (obs, states),
+            "design": (obs, states),
             "R": (states, exog),
+            "selection": (states, exog),
             "H": (obs, obs),
+            "obs_cov": (obs, obs),
             "Q": (exog, exog),
+            "state_cov": (exog, exog),
             "filtered_states": (time, states),
             "filtered_covariances": (time, states, states),
             "predicted_states": (time, states),
@@ -166,6 +178,7 @@ def build_graph(
             cov_jitter=cov_jitter,
         )
         return pt.vectorize(fn, signature=signature)(T, R, Q, filtered_states, filtered_covariances)
+        # return fn(T, R, Q, filtered_states, filtered_covariances)
 
     def smoother_step(self, *args):
         a, P, a_smooth, P_smooth, T, R, Q = self.unpack_args(args)
diff --git a/tests/statespace/test_statespace.py b/tests/statespace/test_statespace.py
@@ -11,6 +11,8 @@
 from numpy.testing import assert_allclose
 
 from pymc_extras.statespace.core.statespace import FILTER_FACTORY, PyMCStateSpace
+from pymc_extras.statespace.filters.kalman_filter import StandardFilter
+from pymc_extras.statespace.filters.kalman_smoother import KalmanSmoother
 from pymc_extras.statespace.models import structural as st
 from pymc_extras.statespace.models.utilities import make_default_coords
 from pymc_extras.statespace.utils.constants import (
@@ -878,3 +880,47 @@ def test_insert_batched_rvs(ss_mod, batch_size):
         ss_mod._insert_random_variables()
     matrices = ss_mod.unpack_statespace()
     assert matrices[4].type.shape == (*batch_size, 2, 2)
+
+
+@pytest.mark.parametrize("batch_size", [(10,), (10, 3, 5)])
+def test_insert_batched_rvs_in_kf(ss_mod, batch_size):
+    data = pt.as_tensor(np.random.normal(size=(*batch_size, 7, 1)).astype(floatX))
+    data.name = "data"
+    kf = StandardFilter()
+
+    with pm.Model():
+        rho = pm.Normal("rho", shape=batch_size)
+        zeta = pm.Normal("zeta", shape=batch_size)
+        ss_mod._insert_random_variables()
+
+    matrices = x0, P0, c, d, T, Z, R, H, Q = ss_mod.unpack_statespace()
+    outputs = kf.build_graph(data, *matrices)
+
+    logp = outputs.pop(-1)
+    states, covs = outputs[:3], outputs[3:]
+    filtered_states, predicted_states, observed_states = states
+    filtered_covariances, predicted_covariances, observed_covariances = covs
+
+    assert logp.type.shape == (*batch_size, 7)
+    assert filtered_states.type.shape == (*batch_size, 7, 2)
+    assert predicted_states.type.shape == (*batch_size, 7, 2)
+    assert observed_states.type.shape == (*batch_size, 7, 1)
+    assert filtered_covariances.type.shape == (*batch_size, 7, 2, 2)
+    assert predicted_covariances.type.shape == (*batch_size, 7, 2, 2)
+    assert observed_covariances.type.shape == (*batch_size, 7, 1, 1)
+
+    ks = KalmanSmoother()
+    smoothed_states, smoothed_covariances = ks.build_graph(
+        T, R, Q, filtered_states, filtered_covariances
+    )
+    assert smoothed_states.type.shape == (
+        *batch_size,
+        None,
+        2,
+    )  # TODO: why do we lose the time dimension here?
+    assert smoothed_covariances.type.shape == (
+        *batch_size,
+        None,
+        2,
+        2,
+    )  # TODO: why do we lose the time dimension here?