Add vectorize_draws argument to sample methods

jessegrabowski · jessegrabowski · commit fa470afbb092 · 2026-01-24T20:16:07.000-06:00
diff --git a/pymc_extras/statespace/core/statespace.py b/pymc_extras/statespace/core/statespace.py
@@ -960,6 +960,7 @@ def build_statespace_graph(
         mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
         save_kalman_filter_outputs_in_idata: bool = False,
         mode: str | None = None,
+        vectorize_draws: bool = True,
     ) -> None:
         """
         Given a parameter vector `theta`, constructs the full computational graph describing the state space model and
@@ -1022,6 +1023,11 @@ def build_statespace_graph(
                 The `mode` argument is deprecated and will be removed in a future version. Pass ``mode`` to the
                 model constructor, or manually specify ``compile_kwargs`` in sampling functions instead.
 
+        vectorize_draws : bool, default True
+            If True, sample all draws in a single vectorized operation. This is significantly faster but requires
+            more memory. It is strongly recommended to keep this True unless the state space is so large that memory
+            becomes an issue.
+
         """
         if mode is not None:
             warnings.warn(
@@ -1078,6 +1084,7 @@ def build_statespace_graph(
             observed=data,
             dims=obs_dims,
             method=mvn_method,
+            vectorize_draws=vectorize_draws,
         )
 
         self._fit_coords = pm_mod.coords.copy()
@@ -1271,6 +1278,7 @@ def _sample_conditional(
         random_seed: RandomState | None = None,
         data: pt.TensorLike | None = None,
         mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
+        vectorize_draws: bool = True,
         **kwargs,
     ):
         """
@@ -1300,6 +1308,11 @@ def _sample_conditional(
             In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
             recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
 
+        vectorize_draws : bool, default True
+            If True, sample all draws in a single vectorized operation. This is significantly faster but requires
+            more memory. It is strongly recommended to keep this True unless the state space is so large that memory
+            becomes an issue.
+
         kwargs:
             Additional keyword arguments are passed to pymc.sample_posterior_predictive
 
@@ -1355,6 +1368,7 @@ def _sample_conditional(
                     logp=dummy_ll,
                     dims=state_dims,
                     method=mvn_method,
+                    vectorize_draws=vectorize_draws,
                 )
 
                 obs_mu = d + (Z @ mu[..., None]).squeeze(-1)
@@ -1367,6 +1381,7 @@ def _sample_conditional(
                     logp=dummy_ll,
                     dims=obs_dims,
                     method=mvn_method,
+                    vectorize_draws=vectorize_draws,
                 )
 
         # TODO: Remove this after pm.Flat initial values are fixed
@@ -1523,6 +1538,7 @@ def sample_conditional_prior(
         idata: InferenceData,
         random_seed: RandomState | None = None,
         mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
+        vectorize_draws: bool = True,
         **kwargs,
     ) -> InferenceData:
         """
@@ -1547,6 +1563,11 @@ def sample_conditional_prior(
             In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
             recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
 
+        vectorize_draws : bool, default True
+            If True, sample all draws in a single vectorized operation. This is significantly faster but requires
+            more memory. It is strongly recommended to keep this True unless the state space is so large that memory
+            becomes an issue.
+
         kwargs:
             Additional keyword arguments are passed to pymc.sample_posterior_predictive
 
@@ -1559,14 +1580,20 @@ def sample_conditional_prior(
         """
 
         return self._sample_conditional(
-            idata=idata, group="prior", random_seed=random_seed, mvn_method=mvn_method, **kwargs
+            idata=idata,
+            group="prior",
+            random_seed=random_seed,
+            mvn_method=mvn_method,
+            vectorize_draws=vectorize_draws,
+            **kwargs,
         )
 
     def sample_conditional_posterior(
         self,
         idata: InferenceData,
         random_seed: RandomState | None = None,
         mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
+        vectorize_draws: bool = True,
         **kwargs,
     ):
         """
@@ -1590,6 +1617,11 @@ def sample_conditional_posterior(
             In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
             recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
 
+        vectorize_draws : bool, default True
+            If True, sample all draws in a single vectorized operation. This is significantly faster but requires
+            more memory. It is strongly recommended to keep this True unless the state space is so large that memory
+            becomes an issue.
+
         kwargs:
             Additional keyword arguments are passed to pymc.sample_posterior_predictive
 
@@ -1602,7 +1634,12 @@ def sample_conditional_posterior(
         """
 
         return self._sample_conditional(
-            idata=idata, group="posterior", random_seed=random_seed, mvn_method=mvn_method, **kwargs
+            idata=idata,
+            group="posterior",
+            random_seed=random_seed,
+            mvn_method=mvn_method,
+            vectorize_draws=vectorize_draws,
+            **kwargs,
         )
 
     def sample_unconditional_prior(
diff --git a/pymc_extras/statespace/filters/distributions.py b/pymc_extras/statespace/filters/distributions.py
@@ -9,6 +9,7 @@
 from pymc.distributions.shape_utils import get_support_shape_1d
 from pymc.logprob.abstract import _logprob
 from pytensor.graph.basic import Node
+from pytensor.tensor.random import multivariate_normal
 
 floatX = pytensor.config.floatX
 COV_ZERO_TOL = 0
@@ -366,45 +367,58 @@ def __new__(cls, *args, **kwargs):
         return super().__new__(cls, *args, **kwargs)
 
     @classmethod
-    def dist(cls, mus, covs, logp, method="svd", **kwargs):
-        return super().dist([mus, covs, logp], method=method, **kwargs)
+    def dist(cls, mus, covs, logp, method="svd", vectorize_draws=True, **kwargs):
+        mus, covs, logp = map(pt.as_tensor_variable, (mus, covs, logp))
+        return super().dist(
+            [mus, covs, logp], method=method, vectorize_draws=vectorize_draws, **kwargs
+        )
 
     @classmethod
-    def rv_op(cls, mus, covs, logp, method="svd", size=None):
-        # Batch dimensions (if any) will be on the far left, but scan requires time to be there instead
-        if mus.ndim > 2:
-            mus = pt.moveaxis(mus, -2, 0)
-        if covs.ndim > 3:
-            covs = pt.moveaxis(covs, -3, 0)
-
-        mus_, covs_ = mus.type(), covs.type()
-
-        logp_ = logp.type()
+    def rv_op(cls, mus, covs, logp, method="svd", vectorize_draws=True, size=None):
         rng = pytensor.shared(np.random.default_rng())
+        logp_ = logp.type()
 
-        def step(mu, cov, rng):
-            new_rng, mvn = pm.MvNormal.dist(mu=mu, cov=cov, rng=rng, method=method).owner.outputs
-            return new_rng, mvn
-
-        seq_mvn_rng, mvn_seq = pytensor.scan(
-            step,
-            sequences=[mus_, covs_],
-            outputs_info=[rng, None],
-            strict=True,
-            n_steps=mus_.shape[0],
-            return_updates=False,
-        )
-        mvn_seq = pt.specify_shape(mvn_seq, mus.type.shape)
+        if vectorize_draws:
+            mus_, covs_ = mus.type(), covs.type()
+            seq_mvn_rng, mvn_seq = multivariate_normal(
+                mean=mus_, cov=covs_, rng=rng, method=method
+            ).owner.outputs
 
-        # Move time axis back to position -2 so batches are on the left
-        if mvn_seq.ndim > 2:
-            mvn_seq = pt.moveaxis(mvn_seq, 0, -2)
+        else:
+            # Batch dimensions (if any) will be on the far left, but scan requires time to be there instead
+            if mus.ndim > 2:
+                mus = pt.moveaxis(mus, -2, 0)
+            if covs.ndim > 3:
+                covs = pt.moveaxis(covs, -3, 0)
+
+            mus_, covs_ = mus.type(), covs.type()
+
+            def step(mu, cov, rng):
+                new_rng, mvn = pm.MvNormal.dist(
+                    mu=mu, cov=cov, rng=rng, method=method
+                ).owner.outputs
+                return new_rng, mvn
+
+            seq_mvn_rng, mvn_seq = pytensor.scan(
+                step,
+                sequences=[mus_, covs_],
+                outputs_info=[rng, None],
+                strict=True,
+                n_steps=mus_.shape[0],
+                return_updates=False,
+            )
+            mvn_seq = pt.specify_shape(mvn_seq, mus.type.shape)
+
+            # Move time axis back to position -2 so batches are on the left
+            if mvn_seq.ndim > 2:
+                mvn_seq = pt.moveaxis(mvn_seq, 0, -2)
 
         mvn_seq_op = KalmanFilterRV(
             inputs=[mus_, covs_, logp_, rng], outputs=[seq_mvn_rng, mvn_seq], ndim_supp=2
         )
 
         mvn_seq = mvn_seq_op(mus, covs, logp, rng)
+
         return mvn_seq
 
 
diff --git a/tests/statespace/filters/test_distributions.py b/tests/statespace/filters/test_distributions.py
@@ -5,6 +5,7 @@
 import pytest
 
 from numpy.testing import assert_allclose
+from pytensor.graph.basic import equal_computations
 from scipy.stats import multivariate_normal
 
 from pymc_extras.statespace import structural
@@ -268,3 +269,32 @@ def test_lgss_signature():
     )
     assert lgss.owner.op.ndim_supp == 2
     assert lgss.owner.op.ndims_params == [1, 2, 1, 1, 3, 2, 2, 2, 2]
+
+
+def test_sequence_mvnormal_vectorize_draws(rng):
+    n_time = 50
+    k_states = 3
+
+    mu_shape = (n_time, k_states)
+    cov_shape = (n_time, k_states, k_states)
+    logp_shape = (n_time,)
+
+    mus = rng.random(size=mu_shape).astype(floatX)
+    covs = np.zeros(cov_shape, dtype=floatX)
+    for idx in np.ndindex(cov_shape[:-2]):
+        A = rng.random(size=(k_states, k_states)).astype(floatX)
+        covs[idx] = A @ A.T + 0.1 * np.eye(k_states)
+    logp = rng.random(size=logp_shape).astype(floatX)
+
+    seed = sum(map(ord, "test_sequence_mvnormal_vectorize_draws"))
+    with pm.Model() as m1:
+        x_vectorized = SequenceMvNormal("x", mus=mus, covs=covs, logp=logp, vectorize_draws=True)
+        idata1 = pm.sample_prior_predictive(draws=5, random_seed=seed)
+
+    with pm.Model() as m2:
+        x_sequential = SequenceMvNormal("x", mus=mus, covs=covs, logp=logp, vectorize_draws=False)
+        idata2 = pm.sample_prior_predictive(draws=5, random_seed=seed)
+
+    assert_allclose(idata1.prior["x"].values, idata2.prior["x"].values, atol=ATOL, rtol=RTOL)
+
+    assert not equal_computations([x_vectorized], [x_sequential])