NNPDF
diff --git a/‎n3fit/src/n3fit/backends/keras_backend/MetaModel.py‎
Lines changed: 2 additions & 2 deletions b/‎n3fit/src/n3fit/backends/keras_backend/MetaModel.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎n3fit/src/n3fit/backends/keras_backend/base_layers.py‎
Lines changed: 137 additions & 22 deletions b/‎n3fit/src/n3fit/backends/keras_backend/base_layers.py‎
Lines changed: 137 additions & 22 deletions
diff --git a/‎n3fit/src/n3fit/backends/keras_backend/operations.py‎
Lines changed: 0 additions & 6 deletions b/‎n3fit/src/n3fit/backends/keras_backend/operations.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎n3fit/src/n3fit/bnn_wrapper.py‎
Lines changed: 133 additions & 0 deletions b/‎n3fit/src/n3fit/bnn_wrapper.py‎
Lines changed: 133 additions & 0 deletions
@@ -50,7 +50,7 @@
 def _default_loss(y_true, y_pred):  # pylint: disable=unused-argument
     """Default loss to be used when the model is compiled with loss = Null
     (for instance if the prediction of the model is already the loss"""
-    return ops.nansum(y_pred)
+    return ops.sum(y_pred)
 
 
 class MetaModel(Model):
@@ -219,7 +219,7 @@ def losses_fun():
                 # If we only have one dataset the output changes
                 if len(out_names) == 2:
                     predictions = [predictions]
-                total_loss = ops.nansum(predictions, axis=0)
+                total_loss = ops.sum(predictions, axis=0)
                 ret = [total_loss] + predictions
                 return dict(zip(out_names, ret))
 
 
@@ -1,31 +1,36 @@
 """
-This module defines custom base layers to be used by the n3fit
-Neural Network.
-These layers can use the keras standard set of activation function
-or implement their own.
+    This module defines custom base layers to be used by the n3fit
+    Neural Network.
+    These layers can use the keras standard set of activation function
+    or implement their own.
 
-For a layer to be used by n3fit it should be contained in the `layers` dictionary defined below.
-This dictionary has the following structure:
+    For a layer to be used by n3fit it should be contained in the `layers` dictionary defined below.
+    This dictionary has the following structure:
 
-    'name of the layer' : ( Layer_class, {dictionary of arguments: defaults} )
+        'name of the layer' : ( Layer_class, {dictionary of arguments: defaults} )
 
-In order to add custom activation functions, they must be added to
-the `custom_activations` dictionary with the following structure:
+    In order to add custom activation functions, they must be added to
+    the `custom_activations` dictionary with the following structure:
 
-    'name of the activation' : function
+        'name of the activation' : function
 
-The names of the layer and the activation function are the ones to be used in the n3fit runcard.
+    The names of the layer and the activation function are the ones to be used in the n3fit runcard.
 """
+import numpy as np
+import keras.backend as K
+import tensorflow as tf
+import math
+from scipy.stats import norm
 
 from keras.layers import Dense as KerasDense
-from keras.layers import Dropout, Lambda
+from keras.layers import Dropout, Lambda, Layer
 from keras.layers import Input  # pylint: disable=unused-import
 from keras.layers import LSTM, Concatenate
 from keras.regularizers import l1_l2
 
 from . import operations as ops
 from .MetaLayer import MetaLayer
-
+from contextlib import contextmanager
 
 # Custom activation functions
 def square_activation(x):
@@ -74,14 +79,116 @@ def ReshapedLSTM(input_tensor):
 
     return ReshapedLSTM
 
+class VBDense(Layer):
+    def __init__(
+            self, 
+            out_features: int, 
+            in_features: int, 
+            prior_prec: float = 0.001, 
+            map: bool = False, 
+            std_init: float = -9, 
+            lbound=-30, 
+            ubound=11, 
+            training = True
+    ):
+        super().__init__()
+        self.output_dim = out_features
+        self.input_dim = in_features
+        self.map = map
+        self.prior_prec = tf.cast(prior_prec, tf.float64)
+        self.random = None
+        self.eps = 1e-12 if K.floatx() == 'float64' else 1e-8
+        self.std_init = tf.cast(std_init, tf.float64)
+        self.lbound = lbound
+        self.ubound = ubound
+        self.training = training
+
+    def build(self, input_shape):
+        self.bias = self.add_weight(
+            name='bias', 
+            shape=(self.output_dim,), 
+            initializer='glorot_normal', 
+            trainable=True, 
+            dtype=tf.float64
+            )
+        
+        self.mu_w = self.add_weight(
+            name='mu_w', 
+            shape=(self.output_dim, self.input_dim), 
+            initializer='glorot_normal', 
+            trainable=True, 
+            dtype=tf.float64
+            )
+        
+        self.logsig2_w = self.add_weight(
+            name='logsig2_w', 
+            shape=(self.output_dim, self.input_dim), 
+            initializer='glorot_normal', 
+            trainable=True,
+            dtype=tf.float64,
+            ) 
+        
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        stdv = 1.0 / tf.math.sqrt(tf.cast(self.input_dim, dtype=tf.float64))
+        self.bias.assign(tf.zeros_like(self.bias))
+        self.mu_w.assign(tf.random.normal(tf.shape(self.mu_w), mean=0, stddev=stdv, dtype=tf.float64))
+        self.logsig2_w.assign(tf.random.normal(tf.shape(self.logsig2_w), mean=self.std_init, stddev=0.001, dtype=tf.float64))
+
+    def reset_random(self):
+        self.random = None
+        self.map = False
+
+    def train(self):
+        self.training = True
+
+    def eval(self):
+        self.training = False
+
+    def kl_loss(self) -> tf.Tensor:
+        logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, self.ubound)
+        kl = 0.5 * tf.reduce_sum((self.prior_prec*(tf.math.pow(self.mu_w,2)+tf.math.exp(logsig2_w))
+                                - logsig2_w - tf.constant(1.0, dtype=tf.float64) - tf.math.log(self.prior_prec)))
+        return kl
+        
+    def call(self, input: tf.Tensor) -> tf.Tensor:
+        # Ensure input is tf.float64
+        input = tf.cast(input, tf.float64)
+    
+        if self.training:
+            mu_out = tf.matmul(input, tf.cast(self.mu_w, input.dtype), transpose_b=True) + tf.cast(self.bias, input.dtype)
+            logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, self.ubound)
+            s2_w = tf.math.exp(logsig2_w)
+            input2 = tf.math.pow(input, 2)
+            var_out = tf.matmul(input2, s2_w, transpose_b=True) + tf.cast(self.eps, input.dtype)
+        
+            return mu_out + tf.math.sqrt(var_out) * tf.random.normal(shape=tf.shape(mu_out), dtype=input.dtype)
+    
+        else:
+            # During inference, use MAP estimation (posterior mean) for deterministic output
+            if self.map:
+                mu_out = tf.matmul(input, self.mu_w, transpose_b=True) + self.bias
+                return mu_out
+            
+            logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, 11)
+            if self.random is None:
+                self.random = tf.Variable(tf.random.normal(shape=tf.shape(self.mu_w), dtype=tf.float64))
+            s2_w = tf.math.exp(logsig2_w)
+            # draw fresh samples instead of caching
+            epsilon = tf.random.normal(shape=tf.shape(self.mu_w), dtype=tf.float64)
+            weight = self.mu_w + tf.math.sqrt(s2_w) * epsilon #self.random
+            
+            return tf.matmul(input, weight, transpose_b=True) + self.bias
+
 
-class Dense(KerasDense, MetaLayer):
 
-    def __init__(self, *args, **kwargs):
-        # In Keras == 3.13, np.int() is not accepted by Dense
-        if "units" in kwargs:
-            kwargs["units"] = int(kwargs["units"])
-        super().__init__(*args, **kwargs)
+class Dense(KerasDense, MetaLayer):
+    def __init__(self, **kwargs):
+        # Set default dtype to tf.float64 if not provided
+        if 'dtype' not in kwargs:
+            kwargs['dtype'] = tf.float64
+        super().__init__(**kwargs)
 
 
 def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_kwargs):
@@ -133,7 +240,6 @@ def apply_dense(xinput):
 
     return apply_dense
 
-
 layers = {
     "dense": (
         Dense,
@@ -142,6 +248,7 @@ def apply_dense(xinput):
             "units": 5,
             "activation": "sigmoid",
             "kernel_regularizer": None,
+            "dtype": tf.float64,
         },
     ),
     "dense_per_flavour": (
@@ -151,12 +258,20 @@ def apply_dense(xinput):
             "units": 5,
             "activation": "sigmoid",
             "basis_size": 8,
+            "dtype": tf.float64,
         },
     ),
     "LSTM": (
         LSTM_modified,
         {"kernel_initializer": "glorot_normal", "units": 5, "activation": "sigmoid"},
     ),
+    "VBDense": (
+        VBDense,
+        {   
+            "in_features" : None, 
+            "out_features" : None, 
+        },
+    ),
     "dropout": (Dropout, {"rate": 0.0}),
     "concatenate": (Concatenate, {}),
 }
@@ -173,7 +288,7 @@ def base_layer_selector(layer_name, **kwargs):
 
     Parameters
     ----------
-        `layer_name
+        `layer_name`
             str with the name of the layer
         `**kwargs`
             extra optional arguments to pass to the layer (beyond their defaults)
@@ -232,4 +347,4 @@ def regularizer_selector(reg_name, **kwargs):
         if key in reg_args.keys():
             reg_args[key] = value
 
-    return reg_class(**reg_args)
+    return reg_class(**reg_args)
@@ -46,7 +46,6 @@
     expand_dims,
     leaky_relu,
     reshape,
-    nan_to_num,
     repeat,
     split,
     sum,
@@ -291,8 +290,3 @@ def tensor_splitter(ishape, split_sizes, axis=2, name="splitter"):
         lambda x: Kops.split(x, indices, axis=axis), output_shape=oshapes, name=name
     )
     return sp_layer
-
-
-def nansum(x, *args, **kwargs):
-    """Like np.nansum, returns the sum treating NaN as 0.0 (and inf as a very large number)."""
-    return sum(nan_to_num(x), *args, **kwargs)
@@ -0,0 +1,133 @@
+"""
+Wrapper for BNN (Bayesian Neural Network) inference
+
+This module provides utilities for:
+1. Detecting if a model is a BNN (has VBDense layers)
+2. Generating pseudo-replicas from BNN weight samples for plotting/analysis
+"""
+
+import numpy as np
+import tensorflow as tf
+from n3fit.backends.keras_backend.base_layers import VBDense
+
+
+
+def is_bayesian_model(pdf_model):
+    """
+    Check if the given pdf_model is a BNN (contains VBDense layers)
+
+    Parameters
+    ----------
+    pdf_model : MetaModel
+        The PDF model to check
+
+    Returns
+    -------
+    bool
+        True if the model is a BNN (has VBDense layers), False otherwise
+    """
+    vb_layers = get_vb_layers(pdf_model)
+    return len(vb_layers) > 0
+
+def _get_all_layers_recursively(container):
+    """
+    Recursively get all layers at any depth
+    Ques.: At what layer depth of network would this hit RecursionError?
+    """
+    layers = [container]
+    if hasattr(container, 'layers'):
+        for sub_layer in container.layers:
+            layers.extend(_get_all_layers_recursively(sub_layer))
+    return layers
+
+def get_vb_layers(pdf_model):
+    """
+    Extract all VBDense layers from a PDF model.
+    Uses recursion to find VBDense at any depth in the model hierarchy.
+    """
+    vb_layers = []
+    
+    # Recursively get all layers at any depth
+    all_layers = _get_all_layers_recursively(pdf_model)
+    
+    # Check each layer using isinstance
+    for layer in all_layers:
+        if isinstance(layer, VBDense):
+            vb_layers.append(layer)
+    
+    return vb_layers
+
+class BNNPredictor:
+    """
+    Predictor class for BNNs
+
+    This class handles sampling from the posterior distribution
+    to generate predictions with uncertainty estimates.
+    """
+
+    def __init__(self, pdf_model, n_samples=3):
+        """
+        Initialize the BNN predictor
+
+        Parameters
+        ----------
+        pdf_model : MetaModel
+            The trained PDF model with VBDense layers
+        n_samples : int
+            Number of samples to generate
+        """
+        self.pdf_model = pdf_model
+        self.n_samples = n_samples
+        self.vb_layers = get_vb_layers(pdf_model)
+
+    def reset_random(self):
+        """Reset the random state for each VBDense layer."""
+        for vb_layer in self.vb_layers:
+            vb_layer.reset_random()
+
+    def eval(self):
+        """Evaluate the model in inference mode (training=False)."""
+        for vb_layer in self.vb_layers:
+            vb_layer.eval()
+
+    def train(self):
+        """Set the model to training mode (training=True)."""
+        for vb_layer in self.vb_layers:
+            vb_layer.train()
+
+    def generate_bnn_replica(self):
+        replica_models =[]
+        for i in range(self.n_samples):
+            self.reset_random()
+            self.eval()
+
+            replica = self.pdf_model.single_replica_generator(0)
+            #replica.set_replica_weights(self.pdf_model.get_replica_weights(0), i_replica=0)
+            replica_models.append(replica)
+
+        return replica_models
+
+    def generate_predictions(self, xinput):
+         """
+         Generate predictions via sampling
+
+         Parameters
+         ----------
+         xinput : InputInfo.input.tensor_content
+            an array containing the input values 
+
+         Returns
+         -------
+         predictions : np.ndarray
+         """
+         predictions = []
+
+         for i in range(self.n_samples):
+            # Reset random weights for each VBDense layer
+            self.reset_random()
+            self.eval()
+
+            pdf_output = self.pdf_model.predict({"pdf_input": xinput})
+            predictions.append(pdf_output)
+
+         return predictions