Update student and mixture with multiple dims too

stefanradev93 · stefanradev93 · commit 4d8596b5854d · 2025-09-10T18:43:21.000-04:00
diff --git a/bayesflow/distributions/diagonal_student_t.py b/bayesflow/distributions/diagonal_student_t.py
@@ -63,19 +63,19 @@ def __init__(
 
         self.seed_generator = seed_generator or keras.random.SeedGenerator()
 
-        self.dim = None
+        self.dims = None
         self._loc = None
         self._scale = None
 
     def build(self, input_shape: Shape) -> None:
         if self.built:
             return
 
-        self.dim = int(input_shape[-1])
+        self.dims = tuple(input_shape[1:])
 
         # convert to tensor and broadcast if necessary
-        self.loc = ops.cast(ops.broadcast_to(self.loc, (self.dim,)), "float32")
-        self.scale = ops.cast(ops.broadcast_to(self.scale, (self.dim,)), "float32")
+        self.loc = ops.cast(ops.broadcast_to(self.loc, self.dims), "float32")
+        self.scale = ops.cast(ops.broadcast_to(self.scale, self.dims), "float32")
 
         if self.trainable_parameters:
             self._loc = self.add_weight(
@@ -96,14 +96,14 @@ def build(self, input_shape: Shape) -> None:
 
     def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
         mahalanobis_term = ops.sum((samples - self._loc) ** 2 / self._scale**2, axis=-1)
-        result = -0.5 * (self.df + self.dim) * ops.log1p(mahalanobis_term / self.df)
+        result = -0.5 * (self.df + sum(self.dims)) * ops.log1p(mahalanobis_term / self.df)
 
         if normalize:
             log_normalization_constant = (
-                -0.5 * self.dim * math.log(self.df)
-                - 0.5 * self.dim * math.log(math.pi)
+                -0.5 * sum(self.dims) * math.log(self.df)
+                - 0.5 * sum(self.dims) * math.log(math.pi)
                 - math.lgamma(0.5 * self.df)
-                + math.lgamma(0.5 * (self.df + self.dim))
+                + math.lgamma(0.5 * (self.df + sum(self.dims)))
                 - ops.sum(keras.ops.log(self._scale))
             )
             result += log_normalization_constant
@@ -119,9 +119,10 @@ def sample(self, batch_shape: Shape) -> Tensor:
 
         # The chi-quare samples need to be repeated across self.dim
         # since for each element of batch_shape only one sample is created.
-        chi2_samples = expand_tile(chi2_samples, n=self.dim, axis=-1)
+        chi2_samples = expand_tile(chi2_samples, n=sum(self.dims), axis=-1)
+        chi2_samples = keras.ops.reshape(chi2_samples, batch_shape + self.dims)
 
-        normal_samples = keras.random.normal(batch_shape + (self.dim,), seed=self.seed_generator)
+        normal_samples = keras.random.normal(batch_shape + self.dims, seed=self.seed_generator)
 
         return self._loc + self._scale * normal_samples * ops.sqrt(self.df / chi2_samples)
 
diff --git a/bayesflow/distributions/mixture.py b/bayesflow/distributions/mixture.py
@@ -59,7 +59,7 @@ def __init__(
 
         self.trainable_mixture = trainable_mixture
 
-        self.dim = None
+        self.dims = None
         self._mixture_logits = None
 
     @allow_batch_size
@@ -78,7 +78,7 @@ def sample(self, batch_shape: Shape) -> Tensor:
         Returns
         -------
         samples: Tensor
-            A tensor of shape `batch_shape + (dim,)` containing samples drawn
+            A tensor of shape `batch_shape + dims` containing samples drawn
             from the mixture.
         """
         # Will use numpy until keras adds support for N-D categorical sampling
@@ -87,7 +87,7 @@ def sample(self, batch_shape: Shape) -> Tensor:
         cat_samples = cat_samples.argmax(axis=-1)
 
         # Prepare array to fill and dtype to infer
-        samples = np.zeros(batch_shape + (self.dim,))
+        samples = np.zeros(batch_shape + self.dims)
         dtype = None
 
         # Fill in array with vectorized sampling per component
@@ -137,7 +137,7 @@ def build(self, input_shape: Shape) -> None:
         if self.built:
             return
 
-        self.dim = input_shape[-1]
+        self.dims = tuple(input_shape[1:])
 
         for distribution in self.distributions:
             distribution.build(input_shape)