bayesflow-org
diff --git a/‎bayesflow/distributions/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎bayesflow/distributions/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bayesflow/distributions/diagonal_normal.py‎
Lines changed: 30 additions & 19 deletions b/‎bayesflow/distributions/diagonal_normal.py‎
Lines changed: 30 additions & 19 deletions
diff --git a/‎bayesflow/distributions/diagonal_student_t.py‎
Lines changed: 26 additions & 15 deletions b/‎bayesflow/distributions/diagonal_student_t.py‎
Lines changed: 26 additions & 15 deletions
diff --git a/‎bayesflow/distributions/distribution.py‎
Lines changed: 6 additions & 0 deletions b/‎bayesflow/distributions/distribution.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎bayesflow/distributions/mixture.py‎
Lines changed: 155 additions & 0 deletions b/‎bayesflow/distributions/mixture.py‎
Lines changed: 155 additions & 0 deletions
diff --git a/‎bayesflow/distributions/mixture_distribution.py‎
Lines changed: 0 additions & 51 deletions b/‎bayesflow/distributions/mixture_distribution.py‎
Lines changed: 0 additions & 51 deletions
diff --git a/‎tests/test_distributions/test_diagonal_normal.py‎ b/‎tests/test_distributions/test_diagonal_normal.py‎
diff --git a/‎tests/test_distributions/test_diagonal_student_t.py‎ b/‎tests/test_distributions/test_diagonal_student_t.py‎
@@ -7,6 +7,7 @@
 from .distribution import Distribution
 from .diagonal_normal import DiagonalNormal
 from .diagonal_student_t import DiagonalStudentT
+from .mixture import Mixture
 
 from .find_distribution import find_distribution
 
 
@@ -1,16 +1,17 @@
-import keras
-from keras.saving import register_keras_serializable as serializable
-
 import math
+
 import numpy as np
 
+import keras
+
 from bayesflow.types import Shape, Tensor
 from bayesflow.utils.decorators import allow_batch_size
+from bayesflow.utils.serialization import serializable, serialize
 
 from .distribution import Distribution
 
 
-@serializable(package="bayesflow.distributions")
+@serializable
 class DiagonalNormal(Distribution):
     """Implements a backend-agnostic diagonal Gaussian distribution."""
 
@@ -65,10 +66,8 @@ def __init__(
     def build(self, input_shape: Shape) -> None:
         self.dim = int(input_shape[-1])
 
-        # convert to tensor and broadcast if necessary
         self.mean = keras.ops.broadcast_to(self.mean, (self.dim,))
         self.mean = keras.ops.cast(self.mean, "float32")
-
         self.std = keras.ops.broadcast_to(self.std, (self.dim,))
         self.std = keras.ops.cast(self.std, "float32")
 
@@ -77,24 +76,24 @@ def build(self, input_shape: Shape) -> None:
         )
 
         if self.use_learnable_parameters:
-            mean = self.mean
-            self.mean = self.add_weight(
-                shape=keras.ops.shape(mean),
-                initializer="zeros",
+            self._mean = self.add_weight(
+                shape=keras.ops.shape(self.mean),
+                # Initializing with const tensor https://github.com/keras-team/keras/pull/20457#discussion_r1832081248
+                initializer=keras.initializers.get(value=self.mean),
                 dtype="float32",
             )
-            self.mean.assign(mean)
-
-            std = self.std
-            self.std = self.add_weight(
-                shape=keras.ops.shape(std),
-                initializer="ones",
+            self._std = self.add_weight(
+                shape=keras.ops.shape(self.std),
+                # Initializing with const tensor https://github.com/keras-team/keras/pull/20457#discussion_r1832081248
+                initializer=keras.initializers.get(self.std),
                 dtype="float32",
             )
-            self.std.assign(std)
+        else:
+            self._mean = self.mean
+            self._std = self.std
 
     def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
-        result = -0.5 * keras.ops.sum((samples - self.mean) ** 2 / self.std**2, axis=-1)
+        result = -0.5 * keras.ops.sum((samples - self._mean) ** 2 / self.std**2, axis=-1)
 
         if normalize:
             result += self.log_normalization_constant
@@ -103,4 +102,16 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
 
     @allow_batch_size
     def sample(self, batch_shape: Shape) -> Tensor:
-        return self.mean + self.std * keras.random.normal(shape=batch_shape + (self.dim,), seed=self.seed_generator)
+        return self._mean + self._std * keras.random.normal(shape=batch_shape + (self.dim,), seed=self.seed_generator)
+
+    def get_config(self):
+        base_config = super().get_config()
+
+        config = {
+            "mean": self.mean,
+            "std": self.std,
+            "use_learnable_parameters": self.use_learnable_parameters,
+            "seed_generator": self.seed_generator,
+        }
+
+        return base_config | serialize(config)
@@ -1,17 +1,17 @@
 import keras
-from keras.saving import register_keras_serializable as serializable
 
 import math
 import numpy as np
 
 from bayesflow.types import Shape, Tensor
 from bayesflow.utils import expand_tile
 from bayesflow.utils.decorators import allow_batch_size
+from bayesflow.utils.serialization import serializable, serialize
 
 from .distribution import Distribution
 
 
-@serializable(package="bayesflow.distributions")
+@serializable
 class DiagonalStudentT(Distribution):
     """Implements a backend-agnostic diagonal Student-t distribution."""
 
@@ -86,24 +86,22 @@ def build(self, input_shape: Shape) -> None:
         )
 
         if self.use_learnable_parameters:
-            loc = self.loc
-            self.loc = self.add_weight(
-                shape=keras.ops.shape(loc),
-                initializer="zeros",
+            self._loc = self.add_weight(
+                shape=keras.ops.shape(self.loc),
+                initializer=keras.initializers.get(self.loc),
                 dtype="float32",
             )
-            self.loc.assign(loc)
-
-            scale = self.scale
-            self.scale = self.add_weight(
-                shape=keras.ops.shape(scale),
-                initializer="ones",
+            self._scale = self.add_weight(
+                shape=keras.ops.shape(self.scale),
+                initializer=keras.initializers.get(self.scale),
                 dtype="float32",
             )
-            self.scale.assign(scale)
+        else:
+            self._loc = self.loc
+            self._scale = self.scale
 
     def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
-        mahalanobis_term = keras.ops.sum((samples - self.loc) ** 2 / self.scale**2, axis=-1)
+        mahalanobis_term = keras.ops.sum((samples - self._loc) ** 2 / self._scale**2, axis=-1)
         result = -0.5 * (self.df + self.dim) * keras.ops.log1p(mahalanobis_term / self.df)
 
         if normalize:
@@ -124,4 +122,17 @@ def sample(self, batch_shape: Shape) -> Tensor:
 
         normal_samples = keras.random.normal(batch_shape + (self.dim,), seed=self.seed_generator)
 
-        return self.loc + self.scale * normal_samples * keras.ops.sqrt(self.df / chi2_samples)
+        return self._loc + self._scale * normal_samples * keras.ops.sqrt(self.df / chi2_samples)
+
+    def get_config(self):
+        base_config = super().get_config()
+
+        config = {
+            "df": self.df,
+            "loc": self.loc,
+            "scale": self.scale,
+            "use_learnable_parameters": self.use_learnable_parameters,
+            "seed_generator": self.seed_generator,
+        }
+
+        return base_config | serialize(config)
@@ -2,8 +2,10 @@
 
 from bayesflow.types import Shape, Tensor
 from bayesflow.utils import layer_kwargs
+from bayesflow.utils.serialization import serializable, deserialize
 
 
+@serializable
 class Distribution(keras.Layer):
     def __init__(self, **kwargs):
         super().__init__(**layer_kwargs(kwargs))
@@ -19,3 +21,7 @@ def sample(self, batch_shape: Shape) -> Tensor:
 
     def compute_output_shape(self, input_shape: Shape) -> Shape:
         return keras.ops.shape(self.sample(input_shape[0:1]))
+
+    @classmethod
+    def from_config(cls, config, custom_objects=None):
+        return cls(**deserialize(config, custom_objects=custom_objects))
@@ -0,0 +1,155 @@
+from collections.abc import Sequence
+
+import numpy as np
+
+import keras
+from keras import ops
+
+from bayesflow.types import Shape, Tensor
+from bayesflow.utils.decorators import allow_batch_size
+from bayesflow.utils.serialization import serializable, serialize
+from bayesflow.distributions import Distribution
+
+
+@serializable
+class Mixture(Distribution):
+    """Utility class for a backend-agnostic mixture distributions."""
+
+    def __init__(
+        self,
+        distributions: Sequence[Distribution],
+        mixture_logits: Sequence[float] = None,
+        trainable_mixture: bool = False,
+        **kwargs,
+    ):
+        """
+        Initializes a mixture of distributions as a latent distro.
+
+        Parameters
+        ----------
+        distributions : Sequence[Distribution]
+            A sequence of `Distribution` instances to form the mixture components.
+        mixture_logits : Sequence[float], optional
+            Initial unnormalized log‑weights for each component. If `None`, all
+            components are assigned equal weight. Default is `None`.
+        trainable_mixture : bool, optional
+            Whether the mixture weights (`mixture_logits`) should be trainable.
+            Default is `False`.
+        **kwargs
+            Additional keyword arguments passed to the base `Distribution` class.
+
+        Attributes
+        ----------
+        distributions : Sequence[Distribution]
+            The list of component distributions.
+        mixture_logits : Tensor
+            Trainable or fixed logits representing the mixture weights.
+        dim : int or None
+            Dimensionality of the output samples; set when first sampling.
+        """
+
+        super().__init__(**kwargs)
+
+        self.dim = None
+        self.distributions = distributions
+
+        if mixture_logits is None:
+            mixture_logits = keras.ops.ones(shape=len(distributions))
+
+        self.mixture_logits = mixture_logits
+        self._mixture_logits = self.add_weight(
+            shape=(len(distributions),),
+            initializer=keras.initializers.Constant(value=mixture_logits),
+            dtype="float32",
+            trainable=trainable_mixture,
+        )
+
+        self.trainable_mixture = trainable_mixture
+
+    @allow_batch_size
+    def sample(self, batch_shape: Shape) -> Tensor:
+        """
+        Draws samples from the mixture distribution by sampling a categorical index
+        for each entry in `batch_shape` according to the softmax of `mixture_logits`,
+        then draws from the corresponding component distribution.
+
+        Parameters
+        ----------
+        batch_shape : Shape
+            The desired sample batch shape (tuple of ints), not including the
+            event dimension.
+
+        Returns
+        -------
+        samples: Tensor
+            A tensor of shape `batch_shape + (dim,)` containing samples drawn
+            from the mixture.
+        """
+        # Will use numpy until keras adds support for N-D categorical sampling
+        pvals = keras.ops.convert_to_numpy(keras.ops.softmax(self._mixture_logits))
+        cat_samples = np.random.multinomial(n=1, pvals=pvals, size=batch_shape)
+        cat_samples = cat_samples.argmax(axis=-1)
+
+        # Prepare array to fill and dtype to infer
+        samples = np.zeros(batch_shape + (self.dim,))
+        dtype = None
+
+        # Fill in array with vectorized sampling per component
+        for i in range(len(self.distributions)):
+            dist_mask = cat_samples == i
+            dist_indices = np.where(dist_mask)
+            num_dist_samples = np.sum(dist_mask)
+            dist_samples = keras.ops.convert_to_numpy(self.distributions[i].sample(num_dist_samples))
+
+            samples[dist_indices] = dist_samples
+
+            dtype = dtype or keras.ops.dtype(dist_samples)
+
+        # Convert to keras for compatibility
+        samples = keras.ops.convert_to_tensor(samples, dtype=dtype)
+
+        return samples
+
+    def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
+        """
+        Compute the log probability of given samples under the mixture.
+
+        For each input sample, computes the weighted log‑sum‑exp of the component
+        log‑probabilities plus the mixture log‑weights.
+
+        Parameters
+        ----------
+        samples : Tensor
+            A tensor of samples with shape `batch_shape + (dim,)`.
+        normalize : bool, optional
+            If `True`, returns normalized log‑probabilities (i.e., includes the
+            log normalization constant). Default is `True`.
+
+        Returns
+        -------
+        Tensor
+            A tensor of shape `batch_shape`, containing the log probability of
+            each sample under the mixture distribution.
+        """
+
+        log_prob = [distribution.log_prob(samples, normalize=normalize) for distribution in self.distributions]
+        log_prob = ops.stack(log_prob, axis=-1)
+        log_prob = ops.logsumexp(log_prob + ops.log_softmax(self._mixture_logits), axis=-1)
+        return log_prob
+
+    def build(self, input_shape: Shape) -> None:
+        for distribution in self.distributions:
+            distribution.build(input_shape)
+
+        self.dim = input_shape[-1]
+
+    def get_config(self):
+        base_config = super().get_config()
+
+        config = {
+            "distributions": self.distributions,
+            "mixture_logits": self.mixture_logits,
+            "trainable_mixture": self.trainable_mixture,
+        }
+
+        return base_config | serialize(config)