Implement Dim ZeroSumNormal

ricardoV94 · ricardoV94 · commit 7f76f2300b95 · 2025-06-30T00:12:28.000+02:00
diff --git a/pymc/dims/distributions/vector.py b/pymc/dims/distributions/vector.py
@@ -14,11 +14,15 @@
 import pytensor.xtensor as ptx
 import pytensor.xtensor.random as ptxr
 
+from pytensor.tensor import as_tensor
 from pytensor.tensor.random.utils import normalize_size_param
+from pytensor.xtensor import as_xtensor
 from pytensor.xtensor import random as pxr
 
 from pymc.dims.distributions.core import VectorDimDistribution
+from pymc.dims.transforms import ZeroSumTransform
 from pymc.distributions.multivariate import ZeroSumNormalRV
+from pymc.util import UNSET
 
 
 class Categorical(VectorDimDistribution):
@@ -100,3 +104,62 @@ def make_node(self, rng, size, sigma, support_shape):
             # We need to rebuild the graph with new size type
             return self.rv_op(sigma, support_shape, size=size, rng=rng).owner
         return super().make_node(rng, size, sigma, support_shape)
+
+
+class ZeroSumNormal(VectorDimDistribution):
+    @classmethod
+    def __new__(
+        cls, *args, core_dims=None, dims=None, default_transform=UNSET, observed=None, **kwargs
+    ):
+        if core_dims is not None:
+            if isinstance(core_dims, str):
+                core_dims = (core_dims,)
+
+            # Create default_transform
+            if observed is None and default_transform is UNSET:
+                default_transform = ZeroSumTransform(dims=core_dims)
+
+        # If the user didn't specify dims, take it from core_dims
+        # We need them to be forwarded to dist in the `dims_dict` argument
+        if dims is None and core_dims is not None:
+            dims = (..., *core_dims)
+
+        return super().__new__(
+            *args,
+            core_dims=core_dims,
+            dims=dims,
+            default_transform=default_transform,
+            observed=observed,
+            **kwargs,
+        )
+
+    @classmethod
+    def dist(cls, sigma=1.0, *, core_dims=None, dims_dict, **kwargs):
+        if isinstance(core_dims, str):
+            core_dims = (core_dims,)
+        if core_dims is None or len(core_dims) == 0:
+            raise ValueError("ZeroSumNormal requires atleast 1 core_dims")
+
+        support_dims = as_xtensor(
+            as_tensor([dims_dict[core_dim] for core_dim in core_dims]), dims=("_",)
+        )
+        sigma = cls._as_xtensor(sigma)
+
+        return super().dist(
+            [sigma, support_dims], core_dims=core_dims, dims_dict=dims_dict, **kwargs
+        )
+
+    @classmethod
+    def xrv_op(self, sigma, support_dims, core_dims, extra_dims=None, rng=None):
+        sigma = as_xtensor(sigma)
+        support_dims = as_xtensor(support_dims, dims=("_",))
+        support_shape = support_dims.values
+        core_rv = DimZeroSumNormalRV.rv_op(sigma=sigma.values, support_shape=support_shape).owner.op
+        xop = pxr._as_xrv(
+            core_rv,
+            core_inps_dims_map=[(), (0,)],
+            core_out_dims_map=tuple(range(1, len(core_dims) + 1)),
+        )
+        # Dummy "_" core dim to absorb the support_shape vector
+        # If ZeroSumNormal expected a scalar per support dim, this wouldn't be needed
+        return xop(sigma, support_dims, core_dims=("_", *core_dims), extra_dims=extra_dims, rng=rng)
diff --git a/pymc/dims/transforms.py b/pymc/dims/transforms.py
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import pytensor.tensor as pt
 import pytensor.xtensor as ptx
 
 from pymc.logprob.transforms import Transform
@@ -51,3 +52,44 @@ def log_jac_det(self, value, *inputs):
 
 
 log_odds_transform = LogOddsTransform()
+
+
+class ZeroSumTransform(DimTransform):
+    name = "zerosum"
+
+    def __init__(self, dims: tuple[str, ...]):
+        self.dims = dims
+
+    @staticmethod
+    def extend_dim(array, dim):
+        n = (array.sizes[dim] + 1).astype("floatX")
+        sum_vals = array.sum(dim)
+        norm = sum_vals / (pt.sqrt(n) + n)
+        fill_val = norm - sum_vals / pt.sqrt(n)
+
+        out = ptx.concat([array, fill_val], dim=dim)
+        return out - norm
+
+    @staticmethod
+    def reduce_dim(array, dim):
+        n = array.sizes[dim].astype("floatX")
+        last = array.isel({dim: -1})
+
+        sum_vals = -last * pt.sqrt(n)
+        norm = sum_vals / (pt.sqrt(n) + n)
+        return array.isel({dim: slice(None, -1)}) + norm
+
+    def forward(self, value, *rv_inputs):
+        for dim in self.dims:
+            value = self.reduce_dim(value, dim=dim)
+        return value
+
+    def backward(self, value, *rv_inputs):
+        for dim in self.dims:
+            value = self.extend_dim(value, dim=dim)
+        return value
+
+    def log_jac_det(self, value, *rv_inputs):
+        # Use following once broadcast_like is implemented
+        # as_xtensor(0).broadcast_like(value, exclude=self.dims)`
+        return value.sum(self.dims) * 0
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
@@ -367,6 +367,7 @@ def __init__(
 
         kwargs.setdefault("inline", True)
         kwargs.setdefault("strict", True)
+        kwargs.setdefault("on_unused_input", "ignore")
         super().__init__(*args, **kwargs)
 
     def update(self, node: Apply) -> dict[Variable, Variable]:
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
@@ -2664,6 +2664,7 @@ def logp(value, alpha, K):
 class ZeroSumNormalRV(SymbolicRandomVariable):
     """ZeroSumNormal random variable."""
 
+    name = "ZeroSumNormal"
     _print_name = ("ZeroSumNormal", "\\operatorname{ZeroSumNormal}")
 
     @classmethod
@@ -2687,12 +2688,12 @@ def rv_op(cls, sigma, support_shape, *, size=None, rng=None):
             zerosum_rv -= zerosum_rv.mean(axis=-axis - 1, keepdims=True)
 
         support_str = ",".join([f"d{i}" for i in range(n_zerosum_axes)])
-        extended_signature = f"[rng],(),(s),[size]->[rng],({support_str})"
-        return ZeroSumNormalRV(
-            inputs=[rng, sigma, support_shape, size],
+        extended_signature = f"[rng],[size],(),(s)->[rng],({support_str})"
+        return cls(
+            inputs=[rng, size, sigma, support_shape],
             outputs=[next_rng, zerosum_rv],
             extended_signature=extended_signature,
-        )(rng, sigma, support_shape, size)
+        )(rng, size, sigma, support_shape)
 
 
 class ZeroSumNormal(Distribution):
@@ -2828,7 +2829,7 @@ def zerosum_default_transform(op, rv):
 
 
 @_logprob.register(ZeroSumNormalRV)
-def zerosumnormal_logp(op, values, rng, sigma, support_shape, size, **kwargs):
+def zerosumnormal_logp(op, values, rng, size, sigma, support_shape, **kwargs):
     (value,) = values
     shape = value.shape
     n_zerosum_axes = op.ndim_supp
diff --git a/tests/dims/test_distributions.py b/tests/dims/test_distributions.py
@@ -0,0 +1,71 @@
+#   Copyright 2025 - present The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import numpy as np
+
+from pymc import Model, draw
+from pymc.dims import ZeroSumNormal
+from pymc.distributions import ZeroSumNormal as RegularZeroSumNormal
+
+
+def test_zerosumnormal():
+    coords = {"time": range(5), "item": range(3)}
+
+    with Model(coords=coords) as model:
+        zsn_item = ZeroSumNormal("zsn_item", core_dims="item", dims=("time", "item"))
+        zsn_time = ZeroSumNormal("zsn_time", core_dims="time", dims=("time", "item"))
+        zsn_item_time = ZeroSumNormal("zsn_item_time", core_dims=("item", "time"))
+    assert zsn_item.type.dims == ("time", "item")
+    assert zsn_time.type.dims == ("time", "item")
+    assert zsn_item_time.type.dims == ("item", "time")
+
+    zsn_item_draw, zsn_time_draw, zsn_item_time_draw = draw(
+        [zsn_item, zsn_time, zsn_item_time], random_seed=1
+    )
+    assert zsn_item_draw.shape == (5, 3)
+    np.testing.assert_allclose(zsn_item_draw.mean(-1), 0, atol=1e-13)
+    assert not np.allclose(zsn_item_draw.mean(0), 0, atol=1e-13)
+
+    assert zsn_time_draw.shape == (5, 3)
+    np.testing.assert_allclose(zsn_time_draw.mean(0), 0, atol=1e-13)
+    assert not np.allclose(zsn_time_draw.mean(-1), 0, atol=1e-13)
+
+    assert zsn_item_time_draw.shape == (3, 5)
+    np.testing.assert_allclose(zsn_item_time_draw.mean(), 0, atol=1e-13)
+
+    with Model(coords=coords) as ref_model:
+        # Check that the ZeroSumNormal can be used in a model
+        RegularZeroSumNormal("zsn_item", dims=("time", "item"))
+        RegularZeroSumNormal("zsn_time", dims=("item", "time"))
+        RegularZeroSumNormal("zsn_item_time", n_zerosum_axes=2, dims=("item", "time"))
+
+    # Check initial_point and logp
+    ip = model.initial_point()
+    ref_ip = ref_model.initial_point()
+    assert ip.keys() == ref_ip.keys()
+    for i, (ip_value, ref_ip_value) in enumerate(zip(ip.values(), ref_ip.values())):
+        if i == 1:
+            # zsn_time is actually transposed in the original model
+            ip_value = ip_value.T
+        np.testing.assert_allclose(ip_value, ref_ip_value)
+
+    logp_fn = model.compile_logp()
+    ref_logp_fn = ref_model.compile_logp()
+    np.testing.assert_allclose(logp_fn(ip), ref_logp_fn(ref_ip))
+
+    # Test a new point
+    rng = np.random.default_rng(68)
+    new_ip = ip.copy()
+    for key in new_ip:
+        new_ip[key] += rng.uniform(size=new_ip[key].shape)
+    np.testing.assert_allclose(logp_fn(new_ip), ref_logp_fn(new_ip))