Working batched Kalman filter and smoother

aandorra-mia · aandorra-mia · commit 51d5f5a1f84c · 2025-06-14T18:11:00.000-04:00
diff --git a/notebooks/batch-examples.ipynb b/notebooks/batch-examples.ipynb
diff --git a/pymc_extras/statespace/filters/kalman_filter.py b/pymc_extras/statespace/filters/kalman_filter.py
@@ -1,4 +1,5 @@
 from abc import ABC
+from functools import partial
 
 import numpy as np
 import pytensor
@@ -9,14 +10,13 @@
 from pytensor.raise_op import Assert
 from pytensor.tensor import TensorVariable
 from pytensor.tensor.slinalg import solve_triangular
-from pytensor.graph.replace import vectorize_graph
 
 from pymc_extras.statespace.filters.utilities import (
     quad_form_sym,
     split_vars_into_seq_and_nonseq,
     stabilize,
 )
-from pymc_extras.statespace.utils.constants import JITTER_DEFAULT, MISSING_FILL
+from pymc_extras.statespace.utils.constants import JITTER_DEFAULT, MISSING_FILL, ALL_KF_OUTPUT_NAMES
 
 MVN_CONST = pt.log(2 * pt.constant(np.pi, dtype="float64"))
 PARAM_NAMES = ["c", "d", "T", "Z", "R", "H", "Q"]
@@ -65,22 +65,56 @@ def check_params(self, data, a0, P0, c, d, T, Z, R, H, Q):
         """
         return data, a0, P0, c, d, T, Z, R, H, Q
 
-    def has_batched_input(self, data, a0, P0, c, d, T, Z, R, H, Q):
-        """
-        Check if any of the inputs are batched.
-        """
-        return any(x.ndim > CORE_NDIM[i] for i, x in enumerate([data, a0, P0, c, d, T, Z, R, H, Q]))
-
-    def get_dummy_core_inputs(self, data, a0, P0, c, d, T, Z, R, H, Q):
-        """
-        Get dummy inputs for the core parameters.
-        """
-        out = []
-        for x, core_ndim in zip([data, a0, P0, c, d, T, Z, R, H, Q], CORE_NDIM):
-            out.append(
-                pt.tensor(f"{x.name}_core_case", dtype=x.dtype, shape=x.type.shape[-core_ndim:])
-            )
-        return out
+    def _make_gufunc_signature(self, inputs):
+        states = "s"
+        obs = "p"
+        exog = "r"
+        time = "t"
+
+        matrix_to_shape = {
+            "data": (time, obs),
+            "a0": (states,),
+            "x0": (states,),
+            "P0": (states, states),
+            "c": (states,),
+            "d": (obs,),
+            "T": (states, states),
+            "Z": (obs, states),
+            "R": (states, exog),
+            "H": (obs, obs),
+            "Q": (exog, exog),
+            "filtered_states": (time, states),
+            "filtered_covariances": (time, states, states),
+            "predicted_states": (time, states),
+            "predicted_covariances": (time, states, states),
+            "observed_states": (time, obs),
+            "observed_covariances": (time, obs, obs),
+            "smoothed_states": (time, states),
+            "smoothed_covariances": (time, states, states),
+            "loglike_obs": (time,),
+        }
+        input_shapes = []
+        output_shapes = []
+
+        for matrix in inputs:
+            name = matrix.name
+            input_shapes.append(matrix_to_shape[name])
+
+        for name in [
+            "filtered_states",
+            "predicted_states",
+            "smoothed_states",
+            "filtered_covariances",
+            "predicted_covariances",
+            "smoothed_covariances",
+            "loglike_obs",
+        ]:
+            output_shapes.append(matrix_to_shape[name])
+
+        input_signature = ",".join(["(" + ",".join(shapes) + ")" for shapes in input_shapes])
+        output_signature = ",".join(["(" + ",".join(shapes) + ")" for shapes in output_shapes])
+
+        return f"{input_signature} -> {output_signature}"
 
     @staticmethod
     def add_check_on_time_varying_shapes(
@@ -150,7 +184,7 @@ def unpack_args(self, args) -> tuple:
 
         return y, a0, P0, c, d, T, Z, R, H, Q
 
-    def build_graph(
+    def _build_graph(
         self,
         data,
         a0,
@@ -206,18 +240,13 @@ def build_graph(
 
         self.missing_fill_value = missing_fill_value
         self.cov_jitter = cov_jitter
-        is_batched = self.has_batched_input(data, a0, P0, c, d, T, Z, R, H, Q)
 
         [R_shape] = constant_fold([R.shape], raise_not_constant=False)
         [Z_shape] = constant_fold([Z.shape], raise_not_constant=False)
 
         self.n_states, self.n_shocks = R_shape[-2:]
         self.n_endog = Z_shape[-2]
 
-        if is_batched:
-            batched_inputs = [data, a0, P0, c, d, T, Z, R, H, Q]
-            data, a0, P0, c, d, T, Z, R, H, Q = self.get_dummy_core_inputs(*batched_inputs)
-
         data, a0, P0, *params = self.check_params(data, a0, P0, c, d, T, Z, R, H, Q)
 
         sequences, non_sequences, seq_names, non_seq_names = split_vars_into_seq_and_nonseq(
@@ -241,15 +270,47 @@ def build_graph(
 
         filter_results = self._postprocess_scan_results(results, a0, P0, n=data.type.shape[0])
 
-        if is_batched:
-            vec_subs = dict(zip([data, a0, P0, c, d, T, Z, R, H, Q], batched_inputs))
-            filter_results = vectorize_graph(filter_results, vec_subs)
-
         if return_updates:
             return filter_results, updates
 
         return filter_results
 
+    def build_graph(
+        self,
+        data,
+        a0,
+        P0,
+        c,
+        d,
+        T,
+        Z,
+        R,
+        H,
+        Q,
+        mode=None,
+        return_updates=False,
+        missing_fill_value=None,
+        cov_jitter=None,
+    ) -> list[TensorVariable] | tuple[list[TensorVariable], dict]:
+        """
+        Build the vectorized computation graph for the Kalman filter.
+        """
+        signature = self._make_gufunc_signature(
+            [data, a0, P0, c, d, T, Z, R, H, Q],
+        )
+        fn = partial(
+            self._build_graph,
+            mode=mode,
+            return_updates=return_updates,
+            missing_fill_value=missing_fill_value,
+            cov_jitter=cov_jitter,
+        )
+        filter_outputs = pt.vectorize(fn, signature=signature)(data, a0, P0, c, d, T, Z, R, H, Q)
+        for output, name in zip(filter_outputs, ALL_KF_OUTPUT_NAMES):
+            output.name = name
+
+        return filter_outputs
+
     def _postprocess_scan_results(self, results, a0, P0, n) -> list[TensorVariable]:
         """
         Transform the values returned by the Kalman Filter scan into a form expected by users. In particular:
diff --git a/pymc_extras/statespace/filters/kalman_smoother.py b/pymc_extras/statespace/filters/kalman_smoother.py
@@ -1,8 +1,8 @@
 import pytensor
 import pytensor.tensor as pt
-
+from functools import partial
+from pytensor.compile import get_mode
 from pytensor.tensor.nlinalg import matrix_dot
-from pytensor.graph.replace import vectorize_graph
 from pymc_extras.statespace.filters.utilities import (
     quad_form_sym,
     split_vars_into_seq_and_nonseq,
@@ -63,40 +63,57 @@ def unpack_args(self, args):
 
         return a, P, a_smooth, P_smooth, T, R, Q
 
-    def has_batched_input(self, T, R, Q, filtered_states, filtered_covariances):
-        """
-        Check if any of the inputs are batched.
-        """
-        return any(
-            x.ndim > SMOOTHER_CORE_NDIM[i]
-            for i, x in enumerate([T, R, Q, filtered_states, filtered_covariances])
-        )
-
-    def get_dummy_core_inputs(self, T, R, Q, filtered_states, filtered_covariances):
-        """
-        Get dummy inputs for the core parameters.
-        """
-        out = []
-        for x, core_ndim in zip(
-            [T, R, Q, filtered_states, filtered_covariances], SMOOTHER_CORE_NDIM
-        ):
-            out.append(
-                pt.tensor(f"{x.name}_core_case", dtype=x.dtype, shape=x.type.shape[-core_ndim:])
-            )
-        return out
-
-    def build_graph(
-        self, T, R, Q, filtered_states, filtered_covariances, cov_jitter=JITTER_DEFAULT
+    def _make_gufunc_signature(self, inputs):
+        states = "s"
+        obs = "p"
+        exog = "r"
+        time = "t"
+
+        matrix_to_shape = {
+            "data": (time, obs),
+            "a0": (states,),
+            "x0": (states,),
+            "P0": (states, states),
+            "c": (states,),
+            "d": (obs,),
+            "T": (states, states),
+            "Z": (obs, states),
+            "R": (states, exog),
+            "H": (obs, obs),
+            "Q": (exog, exog),
+            "filtered_states": (time, states),
+            "filtered_covariances": (time, states, states),
+            "predicted_states": (time, states),
+            "predicted_covariances": (time, states, states),
+            "observed_states": (time, obs),
+            "observed_covariances": (time, obs, obs),
+            "smoothed_states": (time, states),
+            "smoothed_covariances": (time, states, states),
+            "loglike_obs": (time,),
+        }
+        input_shapes = []
+        output_shapes = []
+
+        for matrix in inputs:
+            name = matrix.name
+            input_shapes.append(matrix_to_shape[name])
+
+        for name in [
+            "smoothed_states",
+            "smoothed_covariances",
+        ]:
+            output_shapes.append(matrix_to_shape[name])
+
+        input_signature = ",".join(["(" + ",".join(shapes) + ")" for shapes in input_shapes])
+        output_signature = ",".join(["(" + ",".join(shapes) + ")" for shapes in output_shapes])
+
+        return f"{input_signature} -> {output_signature}"
+
+    def _build_graph(
+        self, T, R, Q, filtered_states, filtered_covariances, mode=None, cov_jitter=JITTER_DEFAULT
     ):
         self.cov_jitter = cov_jitter
 
-        is_batched = self.has_batched_input(T, R, Q, filtered_states, filtered_covariances)
-        if is_batched:
-            batched_inputs = [T, R, Q, filtered_states, filtered_covariances]
-            T, R, Q, filtered_states, filtered_covariances = self.get_dummy_core_inputs(
-                *batched_inputs
-            )
-
         n, k = filtered_states.type.shape
 
         a_last = pt.specify_shape(filtered_states[-1], (k,))
@@ -125,18 +142,28 @@ def build_graph(
         smoothed_covariances = pt.concatenate(
             [smoothed_covariances[::-1], pt.expand_dims(P_last, axis=(0,))], axis=0
         )
-        smoothed_states.dprint()
-        if is_batched:
-            vec_subs = dict(zip([T, R, Q, filtered_states, filtered_covariances], batched_inputs))
-            smoothed_states, smoothed_covariances = vectorize_graph(
-                [smoothed_states, smoothed_covariances], vec_subs
-            )
 
         smoothed_states.name = "smoothed_states"
         smoothed_covariances.name = "smoothed_covariances"
 
         return smoothed_states, smoothed_covariances
 
+    def build_graph(
+        self, T, R, Q, filtered_states, filtered_covariances, mode=None, cov_jitter=JITTER_DEFAULT
+    ):
+        """
+        Build the vectorized computation graph for the Kalman smoother.
+        """
+        signature = self._make_gufunc_signature(
+            [T, R, Q, filtered_states, filtered_covariances],
+        )
+        fn = partial(
+            self._build_graph,
+            mode=mode,
+            cov_jitter=cov_jitter,
+        )
+        return pt.vectorize(fn, signature=signature)(T, R, Q, filtered_states, filtered_covariances)
+
     def smoother_step(self, *args):
         a, P, a_smooth, P_smooth, T, R, Q = self.unpack_args(args)
         a_hat, P_hat = self.predict(a, P, T, R, Q)
diff --git a/pymc_extras/statespace/filters/utilities.py b/pymc_extras/statespace/filters/utilities.py
@@ -2,7 +2,14 @@
 
 from pytensor.tensor.nlinalg import matrix_dot
 
-from pymc_extras.statespace.utils.constants import JITTER_DEFAULT, NEVER_TIME_VARYING, VECTOR_VALUED
+from pymc_extras.statespace.utils.constants import (
+    JITTER_DEFAULT,
+    NEVER_TIME_VARYING,
+    VECTOR_VALUED,
+)
+
+CORE_NDIM = (2, 1, 2, 1, 1, 2, 2, 2, 2, 2)
+SMOOTHER_CORE_NDIM = (2, 2, 2, 2, 3)
 
 
 def decide_if_x_time_varies(x, name):
@@ -57,3 +64,40 @@ def stabilize(cov, jitter=JITTER_DEFAULT):
 def quad_form_sym(A, B):
     out = matrix_dot(A, B, A.T)
     return 0.5 * (out + out.T)
+
+
+def has_batched_input_smoother(T, R, Q, filtered_states, filtered_covariances):
+    """
+    Check if any of the inputs are batched.
+    """
+    return any(
+        x.ndim > SMOOTHER_CORE_NDIM[i]
+        for i, x in enumerate([T, R, Q, filtered_states, filtered_covariances])
+    )
+
+
+def get_dummy_core_inputs_smoother(T, R, Q, filtered_states, filtered_covariances):
+    """
+    Get dummy inputs for the core parameters.
+    """
+    out = []
+    for x, core_ndim in zip([T, R, Q, filtered_states, filtered_covariances], SMOOTHER_CORE_NDIM):
+        out.append(pt.tensor(f"{x.name}_core_case", dtype=x.dtype, shape=x.type.shape[-core_ndim:]))
+    return out
+
+
+def has_batched_input_filter(data, a0, P0, c, d, T, Z, R, H, Q):
+    """
+    Check if any of the inputs are batched.
+    """
+    return any(x.ndim > CORE_NDIM[i] for i, x in enumerate([data, a0, P0, c, d, T, Z, R, H, Q]))
+
+
+def get_dummy_core_inputs_filter(data, a0, P0, c, d, T, Z, R, H, Q):
+    """
+    Get dummy inputs for the core parameters.
+    """
+    out = []
+    for x, core_ndim in zip([data, a0, P0, c, d, T, Z, R, H, Q], CORE_NDIM):
+        out.append(pt.tensor(f"{x.name}_core_case", dtype=x.dtype, shape=x.type.shape[-core_ndim:]))
+    return out
diff --git a/pymc_extras/statespace/utils/constants.py b/pymc_extras/statespace/utils/constants.py
@@ -47,6 +47,16 @@
 SMOOTHER_OUTPUT_NAMES = ["smoothed_state", "smoothed_covariance"]
 OBSERVED_OUTPUT_NAMES = ["predicted_observed_state", "predicted_observed_covariance"]
 
+ALL_KF_OUTPUT_NAMES = [
+    "filtered_states",
+    "predicted_states",
+    "observed_states",
+    "filtered_covariances",
+    "predicted_covariances",
+    "observed_covariances",
+    "loglike_obs",
+]
+
 MATRIX_DIMS = {
     "x0": (ALL_STATE_DIM,),
     "P0": (ALL_STATE_DIM, ALL_STATE_AUX_DIM),
diff --git a/tests/statespace/filters/test_kalman_filter.py b/tests/statespace/filters/test_kalman_filter.py