.WIP changes needed to implement ZSN

ricardoV94 · ricardoV94 · commit d93c1c98359b · 2025-06-24T19:10:18.000+02:00
diff --git a/pymc/dims/distribution_core.py b/pymc/dims/distribution_core.py
@@ -14,19 +14,23 @@
 from collections.abc import Callable, Sequence
 from itertools import chain
 
+import numpy as np
+
 from pytensor.graph import node_rewriter
 from pytensor.tensor.elemwise import DimShuffle
+from pytensor.tensor.random.op import RandomVariable
 from pytensor.xtensor import as_xtensor
 from pytensor.xtensor.basic import XTensorFromTensor, xtensor_from_tensor
 from pytensor.xtensor.type import XTensorVariable
 
-from pymc import modelcontext
+from pymc import SymbolicRandomVariable, modelcontext
 from pymc.dims.model import with_dims
-from pymc.dims.transforms import log_odds_transform, log_transform
+from pymc.dims.transforms import DimTransform, log_odds_transform, log_transform
 from pymc.distributions.distribution import _support_point, support_point
 from pymc.distributions.shape_utils import DimsWithEllipsis, convert_dims
 from pymc.logprob.abstract import MeasurableOp, _logprob
 from pymc.logprob.rewriting import measurable_ir_rewrites_db
+from pymc.logprob.tensor import MeasurableDimShuffle
 from pymc.logprob.utils import filter_measurable_variables
 from pymc.util import UNSET
 
@@ -46,24 +50,67 @@ def xtensor_from_tensor_support_point(xtensor_op, _, rv):
 
 
 class MeasurableXTensorFromTensor(MeasurableOp, XTensorFromTensor):
-    pass
+    __props__ = ("dims", "core_dims")
+
+    def __init__(self, dims, core_dims):
+        super().__init__(dims=dims)
+        self.core_dims = tuple(core_dims) if core_dims is not None else None
 
 
 @node_rewriter([XTensorFromTensor])
 def find_measurable_xtensor_from_tensor(fgraph, node) -> list[XTensorVariable] | None:
     if isinstance(node.op, MeasurableXTensorFromTensor):
         return None
 
-    if not filter_measurable_variables(node.inputs):
-        return None
+    xs = filter_measurable_variables(node.inputs)
+
+    if not xs:
+        # Check if we have a transposition instead
+        # The rewrite that introduces measurable tranpsoses refuses to apply to multivariate RVs
+        # So we have a chance of inferring the core dims!
+        [ds] = node.inputs
+        ds_node = ds.owner
+        if not (
+            ds_node is not None
+            and isinstance(ds_node.op, DimShuffle)
+            and ds_node.op.is_transpose
+            and filter_measurable_variables(ds_node.inputs)
+        ):
+            return None
+        [x] = ds_node.inputs
+        if not (
+            x.owner is not None and isinstance(x.owner.op, RandomVariable | SymbolicRandomVariable)
+        ):
+            return None
+
+        measurable_x = MeasurableDimShuffle(**ds_node.op._props_dict())(x)
+
+        ndim_supp = x.owner.op.ndim_supp
+        if ndim_supp:
+            inverse_transpose = np.argsort(ds_node.op.shuffle)
+            dims = node.op.dims
+            dims_before_transpose = [dims[i] for i in inverse_transpose]
+            core_dims = dims_before_transpose[-ndim_supp:]
+        else:
+            core_dims = ()
 
-    return [MeasurableXTensorFromTensor(dims=node.op.dims)(*node.inputs)]
+        return [MeasurableXTensorFromTensor(dims=node.op.dims, core_dims=core_dims)(measurable_x)]
+    else:
+        # If this happens we know there's no measurable transpose in between and we can
+        # safely infer the core_dims positionally when the inner logp is returned
+        return [MeasurableXTensorFromTensor(dims=node.op.dims, core_dims=None)(*node.inputs)]
 
 
 @_logprob.register(MeasurableXTensorFromTensor)
 def measurable_xtensor_from_tensor(op, values, rv, **kwargs):
     rv_logp = _logprob(rv.owner.op, tuple(v.values for v in values), *rv.owner.inputs, **kwargs)
-    return xtensor_from_tensor(rv_logp, dims=op.dims)
+    if op.core_dims is None:
+        # The core_dims of the inner rv are on the right
+        dims = op.dims[: rv_logp.ndim]
+    else:
+        # We inferred where the core_dims are!
+        dims = [d for d in op.dims if d not in op.core_dims]
+    return xtensor_from_tensor(rv_logp, dims=dims)
 
 
 measurable_ir_rewrites_db.register(
@@ -75,7 +122,7 @@ class DimDistribution:
     """Base class for PyMC distribution that wrap pytensor.xtensor.random operations, and follow xarray-like semantics."""
 
     xrv_op: Callable
-    default_transform: Callable | None = None
+    default_transform: DimTransform | None = None
 
     @staticmethod
     def _as_xtensor(x):
@@ -156,6 +203,18 @@ def __new__(
             # TODO: If this fails give a more informative error message
             observed = observed.transpose(*rv_dims)
 
+        # Check user didn't pass regular transforms
+        if transform not in (UNSET, None):
+            if not isinstance(transform, DimTransform):
+                raise TypeError(
+                    f"Transform must be a DimTransform, form pymc.dims.transforms, but got {type(transform)}."
+                )
+        if default_transform not in (UNSET, None):
+            if not isinstance(default_transform, DimTransform):
+                raise TypeError(
+                    f"default_transform must be a DimTransform, from pymc.dims.transforms, but got {type(default_transform)}."
+                )
+
         rv = model.register_rv(
             rv,
             name=name,
@@ -188,13 +247,16 @@ def dist(
         if dims_dict is None:
             extra_dims = None
         else:
-            parameter_implied_dims = set(
-                chain.from_iterable(param.type.dims for param in dist_params)
-            )
+            # Exclude dims that are implied by the parameters or core_dims
+            implied_dims = set(chain.from_iterable(param.type.dims for param in dist_params))
+            if core_dims is not None:
+                if isinstance(core_dims, str):
+                    implied_dims.add(core_dims)
+                else:
+                    implied_dims.update(core_dims)
+
             extra_dims = {
-                dim: length
-                for dim, length in dims_dict.items()
-                if dim not in parameter_implied_dims
+                dim: length for dim, length in dims_dict.items() if dim not in implied_dims
             }
         return cls.xrv_op(*dist_params, extra_dims=extra_dims, core_dims=core_dims, **kwargs)
 
diff --git a/pymc/dims/distributions.py b/pymc/dims/distributions.py
@@ -14,16 +14,23 @@
 import pytensor.xtensor as ptx
 import pytensor.xtensor.random as pxr
 
+from pytensor.tensor import as_tensor
+from pytensor.tensor.random.utils import normalize_size_param
+from pytensor.xtensor import as_xtensor
+
 from pymc.dims.distribution_core import (
     DimDistribution,
     MultivariateDimDistribution,
     PositiveDimDistribution,
     UnitDimDistribution,
 )
+from pymc.dims.transforms import ZeroSumTransform
 from pymc.distributions.continuous import Beta as RegularBeta
 from pymc.distributions.continuous import Gamma as RegularGamma
 from pymc.distributions.continuous import HalfStudentTRV, flat, halfflat
 from pymc.distributions.continuous import InverseGamma as RegularInverseGamma
+from pymc.distributions.multivariate import ZeroSumNormalRV
+from pymc.util import UNSET
 
 
 def _get_sigma_from_either_sigma_or_tau(*, sigma, tau):
@@ -221,3 +228,93 @@ def dist(cls, mu, cov=None, *, chol=None, lower=True, core_dims=None, **kwargs):
             cov = chol.dot(chol.rename({d0: safe_name}), dim=d1).rename({safe_name: d1})
 
         return super().dist([mu, cov], core_dims=core_dims, **kwargs)
+
+
+class DimZeroSumNormalRV(ZeroSumNormalRV):
+    def make_node(self, rng, size, sigma, support_shape):
+        if not self.input_types[1].in_same_class(normalize_size_param(size).type):
+            # We need to rebuild the graph with new size type
+            return self.rv_op(sigma, support_shape, size=size, rng=rng).owner
+        return super().make_node(rng, size, sigma, support_shape)
+
+
+class ZeroSumNormal(MultivariateDimDistribution):
+    @classmethod
+    def __new__(
+        cls, *args, core_dims=None, dims=None, default_transform=UNSET, observed=None, **kwargs
+    ):
+        if core_dims is not None:
+            if isinstance(core_dims, str):
+                core_dims = (core_dims,)
+
+            # Create default_transform
+            if observed is None and default_transform is UNSET:
+                default_transform = ZeroSumTransform(dims=core_dims)
+
+        # If the user didn't specify dims, take it from core_dims
+        # We need them to be forwarded to dist in the `dims_dict` argument
+        if dims is None and core_dims is not None:
+            dims = (..., *core_dims)
+
+        return super().__new__(
+            *args,
+            core_dims=core_dims,
+            dims=dims,
+            default_transform=default_transform,
+            observed=observed,
+            **kwargs,
+        )
+
+    @classmethod
+    def dist(cls, sigma=1.0, *, core_dims=None, dims_dict, **kwargs):
+        if isinstance(core_dims, str):
+            core_dims = (core_dims,)
+        if core_dims is None or len(core_dims) == 0:
+            raise ValueError("ZeroSumNormal requires atleast 1 core_dims")
+
+        support_dims = as_xtensor(
+            as_tensor([dims_dict[core_dim] for core_dim in core_dims]), dims=("_",)
+        )
+        sigma = cls._as_xtensor(sigma)
+
+        return super().dist(
+            [sigma, support_dims], core_dims=core_dims, dims_dict=dims_dict, **kwargs
+        )
+
+    # def multivariate_normal(
+    #         mean,
+    #         cov,
+    #         *,
+    #         core_dims: Sequence[str],
+    #         extra_dims=None,
+    #         rng=None,
+    #         method: Literal["cholesky", "svd", "eigh"] = "cholesky",
+    # ):
+    #     mean = as_xtensor(mean)
+    #     if len(core_dims) != 2:
+    #         raise ValueError(
+    #             f"multivariate_normal requires 2 core_dims, got {len(core_dims)}"
+    #         )
+    #
+    #     # Align core_dims, so that the dim that exists in mean comes before the one that only exists in cov
+    #     # This will be the core dimension of the output
+    #     if core_dims[0] not in mean.type.dims:
+    #         core_dims = core_dims[::-1]
+    #
+    #     xop = _as_xrv(ptr.MvNormalRV(method=method))
+    #     return xop(mean, cov, core_dims=core_dims, extra_dims=extra_dims, rng=rng)
+
+    @classmethod
+    def xrv_op(self, sigma, support_dims, core_dims, extra_dims=None, rng=None):
+        sigma = as_xtensor(sigma)
+        support_dims = as_xtensor(support_dims, dims=("_",))
+        support_shape = support_dims.values
+        core_rv = DimZeroSumNormalRV.rv_op(sigma=sigma.values, support_shape=support_shape).owner.op
+        xop = pxr._as_xrv(
+            core_rv,
+            core_inps_dims_map=[(), (0,)],
+            core_out_dims_map=tuple(range(1, len(core_dims) + 1)),
+        )
+        # Dummy "_" core dim to absorb the support_shape vector
+        # If ZeroSumNormal expected a scalar per support dim, this wouldn't be needed
+        return xop(sigma, support_dims, core_dims=("_", *core_dims), extra_dims=extra_dims, rng=rng)
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
@@ -367,6 +367,7 @@ def __init__(
 
         kwargs.setdefault("inline", True)
         kwargs.setdefault("strict", True)
+        kwargs.setdefault("on_unused_input", "ignore")
         super().__init__(*args, **kwargs)
 
     def update(self, node: Apply) -> dict[Variable, Variable]:
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
@@ -2664,6 +2664,7 @@ def logp(value, alpha, K):
 class ZeroSumNormalRV(SymbolicRandomVariable):
     """ZeroSumNormal random variable."""
 
+    name = "ZeroSumNormal"
     _print_name = ("ZeroSumNormal", "\\operatorname{ZeroSumNormal}")
 
     @classmethod
@@ -2687,12 +2688,12 @@ def rv_op(cls, sigma, support_shape, *, size=None, rng=None):
             zerosum_rv -= zerosum_rv.mean(axis=-axis - 1, keepdims=True)
 
         support_str = ",".join([f"d{i}" for i in range(n_zerosum_axes)])
-        extended_signature = f"[rng],(),(s),[size]->[rng],({support_str})"
-        return ZeroSumNormalRV(
-            inputs=[rng, sigma, support_shape, size],
+        extended_signature = f"[rng],[size],(),(s)->[rng],({support_str})"
+        return cls(
+            inputs=[rng, size, sigma, support_shape],
             outputs=[next_rng, zerosum_rv],
             extended_signature=extended_signature,
-        )(rng, sigma, support_shape, size)
+        )(rng, size, sigma, support_shape)
 
 
 class ZeroSumNormal(Distribution):
diff --git a/tests/dims/test_distributions.py b/tests/dims/test_distributions.py
@@ -0,0 +1,66 @@
+#   Copyright 2025 - present The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import numpy as np
+
+from pymc import Model, draw
+from pymc.dims import ZeroSumNormal
+from pymc.distributions import ZeroSumNormal as RegularZeroSumNormal
+
+
+def test_zerosumnormal():
+    coords = {"time": range(5), "item": range(3)}
+
+    with Model(coords=coords) as model:
+        zsn_item = ZeroSumNormal("zsn_item", core_dims="item", dims=("time", "item"))
+        zsn_time = ZeroSumNormal("zsn_time", core_dims="time", dims=("time", "item"))
+        zsn_item_time = ZeroSumNormal("zsn_item_time", core_dims=("item", "time"))
+    assert zsn_item.type.dims == ("time", "item")
+    assert zsn_time.type.dims == ("time", "item")
+    assert zsn_item_time.type.dims == ("item", "time")
+
+    zsn_item_draw, zsn_time_draw, zsn_item_time_draw = draw(
+        [zsn_item, zsn_time, zsn_item_time], random_seed=1
+    )
+    assert zsn_item_draw.shape == (5, 3)
+    np.testing.assert_allclose(zsn_item_draw.mean(-1), 0, atol=1e-13)
+    assert not np.allclose(zsn_item_draw.mean(0), 0, atol=1e-13)
+
+    assert zsn_time_draw.shape == (5, 3)
+    np.testing.assert_allclose(zsn_time_draw.mean(0), 0, atol=1e-13)
+    assert not np.allclose(zsn_time_draw.mean(-1), 0, atol=1e-13)
+
+    assert zsn_item_time_draw.shape == (3, 5)
+    np.testing.assert_allclose(zsn_item_time_draw.mean(), 0, atol=1e-13)
+
+    with Model(coords=coords) as ref_model:
+        # Check that the ZeroSumNormal can be used in a model
+        RegularZeroSumNormal("zsn_item", dims=("time", "item"))
+        RegularZeroSumNormal("zsn_time", dims=("item", "time"))
+        RegularZeroSumNormal("zsn_item_time", n_zerosum_axes=2, dims=("item", "time"))
+
+    # Check initial_point and logp
+    ip = model.initial_point()
+    ref_ip = ref_model.initial_point()
+    assert ip.keys() == ref_ip.keys()
+    for i, (ip_value, ref_ip_value) in enumerate(zip(ip.values(), ref_ip.values())):
+        if i == 1:
+            # zsn_time is actually transposed in the original model
+            ip_value = ip_value.T
+        np.testing.assert_allclose(ip_value, ref_ip_value)
+
+    logp_fn = model.compile_logp()
+    ref_logp_fn = ref_model.compile_logp()
+    logp_fn(ip)
+    # np.testing.assert_allclose(logp_fn(ip), ref_logp_fn(ref_ip))
+    # Test a new