bayesflow-org
diff --git a/‎bayesflow/amortizers.py‎
Lines changed: 12 additions & 11 deletions b/‎bayesflow/amortizers.py‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎bayesflow/default_settings.py‎
Lines changed: 9 additions & 2 deletions b/‎bayesflow/default_settings.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎bayesflow/inference_networks.py‎
Lines changed: 52 additions & 27 deletions b/‎bayesflow/inference_networks.py‎
Lines changed: 52 additions & 27 deletions
diff --git a/‎bayesflow/simulation.py‎
Lines changed: 3 additions & 3 deletions b/‎bayesflow/simulation.py‎
Lines changed: 3 additions & 3 deletions
@@ -23,7 +23,7 @@
 from tensorflow.keras import Model
 
 from bayesflow.exceptions import ConfigurationError, SummaryStatsError
-from bayesflow.losses import log_loss, mmd_summary_space
+from bayesflow.losses import log_loss, mmd_summary_space, kl_dirichlet
 from bayesflow.default_settings import DEFAULT_KEYS
 
 import tensorflow_probability as tfp
@@ -870,7 +870,7 @@ def __init__(self, evidence_net, summary_net=None, loss_fun=None, kl_weight=None
         self.summary_net = summary_net
         self.loss = self._determine_loss(loss_fun)
         self.kl_weight = kl_weight
-        self.n_models = self.evidence_net.n_models
+        self.num_models = self.evidence_net.num_models
 
     def __call__(self, input_dict, return_summary=False, **kwargs):
         """ Performs a forward pass through both networks.
@@ -887,7 +887,7 @@ def __call__(self, input_dict, return_summary=False, **kwargs):
 
         Returns
         -------
-        net_out : tf.Tensor of shape (batch_size, n_models) or tuple of (net_out (batch_size, n_models), 
+        net_out : tf.Tensor of shape (batch_size, num_models) or tuple of (net_out (batch_size, num_models), 
                   summary_out (batch_size, summary_dim)), the latter being the summary network outputs, if
                   `return_summary` set to True.
         """
@@ -905,7 +905,7 @@ def __call__(self, input_dict, return_summary=False, **kwargs):
         return net_out, summary_out
 
     def compute_loss(self, input_dict, **kwargs):
-        """Computes the loss of the amortized model comparison.
+        """Computes the loss of the amortized model comparison instance.
 
         Parameters
         ----------
@@ -929,7 +929,7 @@ def compute_loss(self, input_dict, **kwargs):
             return loss + kl
 
     def sample(self, input_dict, to_numpy=True, **kwargs):
-        """ Samples posterior model probabilities from the higher order Dirichlet density.
+        """Samples posterior model probabilities from the higher order Dirichlet density.
 
         Parameters
         ----------
@@ -946,7 +946,7 @@ def sample(self, input_dict, to_numpy=True, **kwargs):
         Returns
         -------
         pm_samples : tf.Tensor or np.array
-            The posterior draws from the Dirichlet distribution, shape (n_samples, n_batch, n_models)
+            The posterior draws from the Dirichlet distribution, shape (num_samples, num_batch, num_models)
         """
 
         _, full_cond = self._compute_summary_condition(
@@ -958,7 +958,8 @@ def sample(self, input_dict, to_numpy=True, **kwargs):
         return self.evidence_net.sample(full_cond, to_numpy, **kwargs)
 
     def evidence(self, input_dict, to_numpy=True, **kwargs):
-        """TODO"""
+        """Computes the evidence for the competing models given the data sets
+        contained in `input_dict`."""
 
         _, full_cond = self._compute_summary_condition(
             input_dict.get(DEFAULT_KEYS['summary_conditions']), 
@@ -972,7 +973,7 @@ def evidence(self, input_dict, to_numpy=True, **kwargs):
         return alphas
 
     def uncertainty_score(self, input_dict, to_numpy=True, **kwargs):
-        """TODO"""
+        """Computes the uncertainy score according to sum(alphas) / num_models."""
 
         _, full_cond = self._compute_summary_condition(
             input_dict.get(DEFAULT_KEYS['summary_conditions']), 
@@ -981,7 +982,7 @@ def uncertainty_score(self, input_dict, to_numpy=True, **kwargs):
         )
 
         alphas = self(full_cond, return_summary=False, **kwargs)
-        u = tf.reduce_sum(alphas, axis=-1) / self.evidence_net.n_models
+        u = tf.reduce_sum(alphas, axis=-1) / self.evidence_net.num_models
         if to_numpy:
             return u.numpy()
         return u
@@ -1007,7 +1008,7 @@ def _compute_summary_condition(self, summary_conditions, direct_conditions, **kw
         return sum_condition, full_cond
 
     def _determine_loss(self, loss_fun):
-        """ Helper method to determine loss function to use."""
+        """Helper method to determine loss function to use."""
 
         if loss_fun is None:
             return log_loss
@@ -1026,4 +1027,4 @@ def __init_subclass__(cls, **kwargs):
 
     def __init__(self, *args, **kwargs):
         warn(f'{self.__class__.__name__} will be deprecated. Use `AmortizedPosterior` instead.', DeprecationWarning, stacklevel=2)
-        super().__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
@@ -84,6 +84,13 @@ def __init__(self, meta_dict: dict, mandatory_fields: list = []):
 }
 
 
+DEFAULT_SETTINGS_DENSE_EVIDENTIAL = {
+    'units': 128,
+    'kernel_initializer': 'lecun_normal',
+    'activation': 'selu',
+}
+
+
 DEFAULT_SETTING_DENSE_COUPLING = MetaDictSetting(
     meta_dict={
         't_args': {
@@ -120,10 +127,10 @@ def __init__(self, meta_dict: dict, mandatory_fields: list = []):
 DEFAULT_SETTING_EVIDENTIAL_NET = MetaDictSetting(
     meta_dict={
         'dense_args': dict(units=128, kernel_initializer='lecun_normal', activation='selu'),
-        'n_dense': 3,
+        'num_dense': 3,
         'output_activation': 'softplus'
     },
-    mandatory_fields=["n_models"]
+    mandatory_fields=["num_models"]
 )
 
 
 
@@ -209,39 +209,53 @@ def create_config(cls, **kwargs):
 
 class EvidentialNetwork(tf.keras.Model):
     """Implements a network whose outputs are the concentration parameters of a Dirichlet density.
+
+    Follows ideas from:
     
-    Follows the implementation from:
-    https://arxiv.org/abs/2004.10629
+    [1] Radev, S. T., D'Alessandro, M., Mertens, U. K., Voss, A., Köthe, U., & Bürkner, P. C. (2021). 
+    Amortized Bayesian model comparison with evidential deep learning. 
+    IEEE Transactions on Neural Networks and Learning Systems.
+
+    [2] Sensoy, M., Kaplan, L., & Kandemir, M. (2018). 
+    Evidential deep learning to quantify classification uncertainty. 
+    Advances in neural information processing systems, 31.
     """
 
-    def __init__(self, meta={}):
+    def __init__(self, num_models, dense_args=None, num_dense=3, output_activation='softplus', **kwargs):
         """Creates an instance of an evidential network for amortized model comparison.
 
         Parameters
         ----------
-        meta  : dict
-            A list of dictionaries, where each dictionary holds parameter-value pairs
-            for a single :class:`tf.keras.Dense` layer
+        num_models        : int
+            The number of candidate (competing models) for the comparison scenario.
+        dense_args        : dict or None, optional, default: None
+            The arguments for a tf.keras.layers.Dense layer. If None, defaults will be used.
+        num_dense         : int, optional, default: 3
+            The number of dense layers for the main network part. 
+        output_activation : str or callable, optional, default: 'softplus'
+            The activation function to use for the network outputs. 
+            Important: needs to have positive outputs.
+        **kwargs          : dict, optional, default: {}
+            Optional keyword arguments (e.g., name) passed to the tf.keras.Model __init__ method.
         """
 
-        super().__init__()
+        super().__init__(**kwargs)
 
-        # Create settings dictionary
-        meta = build_meta_dict(user_dict=meta,
-                               default_setting=default_settings.DEFAULT_SETTING_EVIDENTIAL_NET)
+        if dense_args is None:
+            dense_args = default_settings.DEFAULT_SETTINGS_DENSE_EVIDENTIAL
 
         # A network to increase representation power
         self.dense = tf.keras.Sequential([
-            tf.keras.layers.Dense(**meta['dense_args'])
-            for _ in range(meta['n_dense'])
+            tf.keras.layers.Dense(**dense_args)
+            for _ in range(num_dense)
         ])
 
         # The layer to output model evidences
-        self.evidence_layer = tf.keras.layers.Dense(
-            meta['n_models'], activation=meta['output_activation'], 
-            **{k: v for k, v in meta['dense_args'].items() if k != 'units' and k != 'activation'})
+        self.alpha_layer = tf.keras.layers.Dense(
+            num_models, activation=output_activation, 
+            **{k: v for k, v in dense_args.items() if k != 'units' and k != 'activation'})
 
-        self.n_models = meta['n_models']
+        self.num_models = num_models
 
     def call(self, condition, **kwargs):
         """Computes evidences for model comparison given a batch of data and optional concatenated context, 
@@ -254,13 +268,17 @@ def call(self, condition, **kwargs):
 
         Returns
         -------
-        alpha      : tf.Tensor of shape (batch_size, n_models) -- the learned model evidences
+        evidence    : tf.Tensor of shape (batch_size, num_models) -- the learned model evidences
         """
 
+        return self.evidence(condition, **kwargs)
+
+    @tf.function
+    def evidence(self, condition, **kwargs):
         rep = self.dense(condition, **kwargs)
-        evidence = self.evidence_layer(rep, **kwargs)
-        alpha = evidence + 1
-        return alpha
+        alpha = self.alpha_layer(rep, **kwargs)
+        evidence = alpha + 1.
+        return evidence
 
     def sample(self, condition, n_samples, **kwargs):
         """Samples posterior model probabilities from the higher-order Dirichlet density.
@@ -271,17 +289,24 @@ def sample(self, condition, n_samples, **kwargs):
             The summary of the observed (or simulated) data, shape (n_data_sets, ...)
         n_samples  : int
             Number of samples to obtain from the approximate posterior
-            
+
         Returns
         -------
         pm_samples : tf.Tensor or np.array
-            The posterior draws from the Dirichlet distribution, shape (n_samples, n_batch, n_models)
+            The posterior draws from the Dirichlet distribution, shape (num_samples, num_batch, num_models)
         """
 
-        # Compute evidential values
-        alpha = self(condition, **kwargs)
+        alpha = self.evidence(condition, **kwargs)
         n_datasets = alpha.shape[0]
-
-        # Sample for each dataset
-        pm_samples = np.stack([np.random.dirichlet(alpha[n, :], size=n_samples) for n in range(n_datasets)], axis=1)
+        pm_samples = np.stack(
+            [np.default_rng().dirichlet(alpha[n, :], size=n_samples) for n in range(n_datasets)], axis=1)
         return pm_samples
+
+    @classmethod
+    def create_config(cls, **kwargs):
+        """"Used to create the settings dictionary for the internal networks of the invertible
+        network. Will fill in missing """
+
+        settings = build_meta_dict(user_dict=kwargs,
+                                   default_setting=default_settings.DEFAULT_SETTING_EVIDENTIAL_NET)
+        return settings
@@ -848,15 +848,15 @@ def __init__(self, generative_models: list, model_probs='equal'):
         """
 
         self.generative_models = generative_models
-        self.n_models = len(generative_models)
+        self.num_models = len(generative_models)
         self.model_prior = self._determine_model_prior(model_probs)
 
     def _determine_model_prior(self, model_probs):
         """Creates the model prior p(M) given user input."""
 
         if model_probs == 'equal':
-            return lambda b: np.random.randint(self.n_models, size=b)
-        return lambda b: np.random.default_rng().choice(self.n_models, size=b, p=model_probs)
+            return lambda b: np.random.randint(self.num_models, size=b)
+        return lambda b: np.random.default_rng().choice(self.num_models, size=b, p=model_probs)
 
     def __call__(self, batch_size, **kwargs):