Implement Dim ZeroSumNormal

ricardoV94 · ricardoV94 · commit fba64f0ab9b8 · 2025-07-22T19:54:04.000+02:00
diff --git a/pymc/dims/distributions/transforms.py b/pymc/dims/distributions/transforms.py
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import pytensor.tensor as pt
 import pytensor.xtensor as ptx
 
 from pymc.logprob.transforms import Transform
@@ -51,3 +52,44 @@ def log_jac_det(self, value, *inputs):
 
 
 log_odds_transform = LogOddsTransform()
+
+
+class ZeroSumTransform(DimTransform):
+    name = "zerosum"
+
+    def __init__(self, dims: tuple[str, ...]):
+        self.dims = dims
+
+    @staticmethod
+    def extend_dim(array, dim):
+        n = (array.sizes[dim] + 1).astype("floatX")
+        sum_vals = array.sum(dim)
+        norm = sum_vals / (pt.sqrt(n) + n)
+        fill_val = norm - sum_vals / pt.sqrt(n)
+
+        out = ptx.concat([array, fill_val], dim=dim)
+        return out - norm
+
+    @staticmethod
+    def reduce_dim(array, dim):
+        n = array.sizes[dim].astype("floatX")
+        last = array.isel({dim: -1})
+
+        sum_vals = -last * pt.sqrt(n)
+        norm = sum_vals / (pt.sqrt(n) + n)
+        return array.isel({dim: slice(None, -1)}) + norm
+
+    def forward(self, value, *rv_inputs):
+        for dim in self.dims:
+            value = self.reduce_dim(value, dim=dim)
+        return value
+
+    def backward(self, value, *rv_inputs):
+        for dim in self.dims:
+            value = self.extend_dim(value, dim=dim)
+        return value
+
+    def log_jac_det(self, value, *rv_inputs):
+        # Use following once broadcast_like is implemented
+        # as_xtensor(0).broadcast_like(value, exclude=self.dims)`
+        return value.sum(self.dims) * 0
diff --git a/pymc/dims/distributions/vector.py b/pymc/dims/distributions/vector.py
@@ -14,9 +14,14 @@
 import pytensor.xtensor as ptx
 import pytensor.xtensor.random as ptxr
 
+from pytensor.tensor import as_tensor
+from pytensor.xtensor import as_xtensor
 from pytensor.xtensor import random as pxr
 
 from pymc.dims.distributions.core import VectorDimDistribution
+from pymc.dims.distributions.transforms import ZeroSumTransform
+from pymc.distributions.multivariate import ZeroSumNormalRV
+from pymc.util import UNSET
 
 
 class Categorical(VectorDimDistribution):
@@ -114,3 +119,80 @@ def dist(cls, mu, cov=None, *, chol=None, lower=True, core_dims=None, **kwargs):
             cov = chol.dot(chol.rename({d0: safe_name}), dim=d1).rename({safe_name: d1})
 
         return super().dist([mu, cov], core_dims=core_dims, **kwargs)
+
+
+class ZeroSumNormal(VectorDimDistribution):
+    """Zero-sum multivariate normal distribution.
+
+    Parameters
+    ----------
+    sigma : xtensor_like, optional
+        The standard deviation of the underlying unconstrained normal distribution.
+        Defaults to 1.0. It cannot have core dimensions.
+    core_dims : Sequence of str, optional
+        The axes along which the zero-sum constraint is applied.
+    **kwargs
+        Additional keyword arguments used to define the distribution.
+
+    Returns
+    -------
+    XTensorVariable
+        An xtensor variable representing the zero-sum multivariate normal distribution.
+    """
+
+    @classmethod
+    def __new__(
+        cls, *args, core_dims=None, dims=None, default_transform=UNSET, observed=None, **kwargs
+    ):
+        if core_dims is not None:
+            if isinstance(core_dims, str):
+                core_dims = (core_dims,)
+
+            # Create default_transform
+            if observed is None and default_transform is UNSET:
+                default_transform = ZeroSumTransform(dims=core_dims)
+
+        # If the user didn't specify dims, take it from core_dims
+        # We need them to be forwarded to dist in the `dim_lenghts` argument
+        if dims is None and core_dims is not None:
+            dims = (..., *core_dims)
+
+        return super().__new__(
+            *args,
+            core_dims=core_dims,
+            dims=dims,
+            default_transform=default_transform,
+            observed=observed,
+            **kwargs,
+        )
+
+    @classmethod
+    def dist(cls, sigma=1.0, *, core_dims=None, dim_lengths, **kwargs):
+        if isinstance(core_dims, str):
+            core_dims = (core_dims,)
+        if core_dims is None or len(core_dims) == 0:
+            raise ValueError("ZeroSumNormal requires atleast 1 core_dims")
+
+        support_dims = as_xtensor(
+            as_tensor([dim_lengths[core_dim] for core_dim in core_dims]), dims=("_",)
+        )
+        sigma = cls._as_xtensor(sigma)
+
+        return super().dist(
+            [sigma, support_dims], core_dims=core_dims, dim_lengths=dim_lengths, **kwargs
+        )
+
+    @classmethod
+    def xrv_op(self, sigma, support_dims, core_dims, extra_dims=None, rng=None):
+        sigma = as_xtensor(sigma)
+        support_dims = as_xtensor(support_dims, dims=("_",))
+        support_shape = support_dims.values
+        core_rv = ZeroSumNormalRV.rv_op(sigma=sigma.values, support_shape=support_shape).owner.op
+        xop = pxr.as_xrv(
+            core_rv,
+            core_inps_dims_map=[(), (0,)],
+            core_out_dims_map=tuple(range(1, len(core_dims) + 1)),
+        )
+        # Dummy "_" core dim to absorb the support_shape vector
+        # If ZeroSumNormal expected a scalar per support dim, this wouldn't be needed
+        return xop(sigma, support_dims, core_dims=("_", *core_dims), extra_dims=extra_dims, rng=rng)
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
@@ -2664,6 +2664,7 @@ def logp(value, alpha, K):
 class ZeroSumNormalRV(SymbolicRandomVariable):
     """ZeroSumNormal random variable."""
 
+    name = "ZeroSumNormal"
     _print_name = ("ZeroSumNormal", "\\operatorname{ZeroSumNormal}")
 
     @classmethod
@@ -2687,12 +2688,12 @@ def rv_op(cls, sigma, support_shape, *, size=None, rng=None):
             zerosum_rv -= zerosum_rv.mean(axis=-axis - 1, keepdims=True)
 
         support_str = ",".join([f"d{i}" for i in range(n_zerosum_axes)])
-        extended_signature = f"[rng],(),(s),[size]->[rng],({support_str})"
-        return ZeroSumNormalRV(
-            inputs=[rng, sigma, support_shape, size],
+        extended_signature = f"[rng],[size],(),(s)->[rng],({support_str})"
+        return cls(
+            inputs=[rng, size, sigma, support_shape],
             outputs=[next_rng, zerosum_rv],
             extended_signature=extended_signature,
-        )(rng, sigma, support_shape, size)
+        )(rng, size, sigma, support_shape)
 
 
 class ZeroSumNormal(Distribution):
@@ -2828,7 +2829,7 @@ def zerosum_default_transform(op, rv):
 
 
 @_logprob.register(ZeroSumNormalRV)
-def zerosumnormal_logp(op, values, rng, sigma, support_shape, size, **kwargs):
+def zerosumnormal_logp(op, values, rng, size, sigma, support_shape, **kwargs):
     (value,) = values
     shape = value.shape
     n_zerosum_axes = op.ndim_supp
diff --git a/tests/dims/distributions/test_vector.py b/tests/dims/distributions/test_vector.py
@@ -19,7 +19,7 @@
 import pymc.distributions as regular_distributions
 
 from pymc import Model
-from pymc.dims import Categorical, MvNormal
+from pymc.dims import Categorical, MvNormal, ZeroSumNormal
 from tests.dims.utils import assert_equivalent_logp_graph, assert_equivalent_random_graph
 
 
@@ -60,3 +60,21 @@ def test_mvnormal():
 
     assert_equivalent_random_graph(model, reference_model)
     assert_equivalent_logp_graph(model, reference_model)
+
+
+def test_zerosumnormal():
+    coords = {"a": range(3), "b": range(2)}
+    with Model(coords=coords) as model:
+        ZeroSumNormal("x", core_dims=("b",), dims=("a", "b"))
+        ZeroSumNormal("y", sigma=3, core_dims=("b",), dims=("a", "b"))
+        ZeroSumNormal("z", core_dims=("a", "b"), dims=("a", "b"))
+
+    with Model(coords=coords) as reference_model:
+        regular_distributions.ZeroSumNormal("x", dims=("a", "b"))
+        regular_distributions.ZeroSumNormal("y", sigma=3, n_zerosum_axes=1, dims=("a", "b"))
+        regular_distributions.ZeroSumNormal("z", n_zerosum_axes=2, dims=("a", "b"))
+
+    assert_equivalent_random_graph(model, reference_model)
+    # Logp is correct, but we have join(..., -1) and join(..., 1), that don't get canonicalized to the same
+    # Should work once https://github.com/pymc-devs/pytensor/issues/1505 is fixed
+    # assert_equivalent_logp_graph(model, reference_model)
diff --git a/tests/dims/test_model.py b/tests/dims/test_model.py
@@ -172,3 +172,56 @@ def test_complex_model():
             tune=200, chains=2, draws=50, compute_convergence_checks=False, progressbar=False
         )
         pm.sample_posterior_predictive(idata, progressbar=False)
+
+
+def test_zerosumnormal_model():
+    coords = {"time": range(5), "item": range(3)}
+
+    with pm.Model(coords=coords) as model:
+        zsn_item = pmd.ZeroSumNormal("zsn_item", core_dims="item", dims=("time", "item"))
+        zsn_time = pmd.ZeroSumNormal("zsn_time", core_dims="time", dims=("time", "item"))
+        zsn_item_time = pmd.ZeroSumNormal("zsn_item_time", core_dims=("item", "time"))
+    assert zsn_item.type.dims == ("time", "item")
+    assert zsn_time.type.dims == ("time", "item")
+    assert zsn_item_time.type.dims == ("item", "time")
+
+    zsn_item_draw, zsn_time_draw, zsn_item_time_draw = pm.draw(
+        [zsn_item, zsn_time, zsn_item_time], random_seed=1
+    )
+    assert zsn_item_draw.shape == (5, 3)
+    np.testing.assert_allclose(zsn_item_draw.mean(-1), 0, atol=1e-13)
+    assert not np.allclose(zsn_item_draw.mean(0), 0, atol=1e-13)
+
+    assert zsn_time_draw.shape == (5, 3)
+    np.testing.assert_allclose(zsn_time_draw.mean(0), 0, atol=1e-13)
+    assert not np.allclose(zsn_time_draw.mean(-1), 0, atol=1e-13)
+
+    assert zsn_item_time_draw.shape == (3, 5)
+    np.testing.assert_allclose(zsn_item_time_draw.mean(), 0, atol=1e-13)
+
+    with pm.Model(coords=coords) as ref_model:
+        # Check that the ZeroSumNormal can be used in a model
+        pm.ZeroSumNormal("zsn_item", dims=("time", "item"))
+        pm.ZeroSumNormal("zsn_time", dims=("item", "time"))
+        pm.ZeroSumNormal("zsn_item_time", n_zerosum_axes=2, dims=("item", "time"))
+
+    # Check initial_point and logp
+    ip = model.initial_point()
+    ref_ip = ref_model.initial_point()
+    assert ip.keys() == ref_ip.keys()
+    for i, (ip_value, ref_ip_value) in enumerate(zip(ip.values(), ref_ip.values())):
+        if i == 1:
+            # zsn_time is actually transposed in the original model
+            ip_value = ip_value.T
+        np.testing.assert_allclose(ip_value, ref_ip_value)
+
+    logp_fn = model.compile_logp()
+    ref_logp_fn = ref_model.compile_logp()
+    np.testing.assert_allclose(logp_fn(ip), ref_logp_fn(ref_ip))
+
+    # Test a new point
+    rng = np.random.default_rng(68)
+    new_ip = ip.copy()
+    for key in new_ip:
+        new_ip[key] += rng.uniform(size=new_ip[key].shape)
+    np.testing.assert_allclose(logp_fn(new_ip), ref_logp_fn(new_ip))