NNPDF
diff --git a/‎n3fit/src/n3fit/backends/keras_backend/base_layers.py‎
Lines changed: 83 additions & 65 deletions b/‎n3fit/src/n3fit/backends/keras_backend/base_layers.py‎
Lines changed: 83 additions & 65 deletions
@@ -1,21 +1,22 @@
 """
-    This module defines custom base layers to be used by the n3fit
-    Neural Network.
-    These layers can use the keras standard set of activation function
-    or implement their own.
+This module defines custom base layers to be used by the n3fit
+Neural Network.
+These layers can use the keras standard set of activation function
+or implement their own.
 
-    For a layer to be used by n3fit it should be contained in the `layers` dictionary defined below.
-    This dictionary has the following structure:
+For a layer to be used by n3fit it should be contained in the `layers` dictionary defined below.
+This dictionary has the following structure:
 
-        'name of the layer' : ( Layer_class, {dictionary of arguments: defaults} )
+    'name of the layer' : ( Layer_class, {dictionary of arguments: defaults} )
 
-    In order to add custom activation functions, they must be added to
-    the `custom_activations` dictionary with the following structure:
+In order to add custom activation functions, they must be added to
+the `custom_activations` dictionary with the following structure:
 
-        'name of the activation' : function
+    'name of the activation' : function
 
-    The names of the layer and the activation function are the ones to be used in the n3fit runcard.
+The names of the layer and the activation function are the ones to be used in the n3fit runcard.
 """
+
 import numpy as np
 import keras.backend as K
 import tensorflow as tf
@@ -32,6 +33,7 @@
 from .MetaLayer import MetaLayer
 from contextlib import contextmanager
 
+
 # Custom activation functions
 def square_activation(x):
     """Squares the input"""
@@ -79,17 +81,18 @@ def ReshapedLSTM(input_tensor):
 
     return ReshapedLSTM
 
+
 class VBDense(Layer):
     def __init__(
-            self, 
-            out_features: int, 
-            in_features: int, 
-            prior_prec: float = 0.001, 
-            map: bool = False, 
-            std_init: float = -9, 
-            lbound=-30, 
-            ubound=11, 
-            training = True
+        self,
+        out_features: int,
+        in_features: int,
+        prior_prec: float = 0.001,
+        map: bool = False,
+        std_init: float = -9,
+        lbound=-30,
+        ubound=11,
+        training=True,
     ):
         super().__init__()
         self.output_dim = out_features
@@ -105,79 +108,99 @@ def __init__(
 
     def build(self, input_shape):
         self.bias = self.add_weight(
-            name='bias', 
-            shape=(self.output_dim,), 
-            initializer='glorot_normal', 
-            trainable=True, 
-            dtype=tf.float64
-            )
-        
+            name='bias',
+            shape=(self.output_dim,),
+            initializer='glorot_normal',
+            trainable=True,
+            dtype=tf.float64,
+        )
+
         self.mu_w = self.add_weight(
-            name='mu_w', 
-            shape=(self.output_dim, self.input_dim), 
-            initializer='glorot_normal', 
-            trainable=True, 
-            dtype=tf.float64
-            )
-        
+            name='mu_w',
+            shape=(self.output_dim, self.input_dim),
+            initializer='glorot_normal',
+            trainable=True,
+            dtype=tf.float64,
+        )
+
         self.logsig2_w = self.add_weight(
-            name='logsig2_w', 
-            shape=(self.output_dim, self.input_dim), 
-            initializer='glorot_normal', 
+            name='logsig2_w',
+            shape=(self.output_dim, self.input_dim),
+            initializer='glorot_normal',
             trainable=True,
             dtype=tf.float64,
-            ) 
-        
+        )
+
         self.reset_parameters()
 
     def reset_parameters(self):
         stdv = 1.0 / tf.math.sqrt(tf.cast(self.input_dim, dtype=tf.float64))
         self.bias.assign(tf.zeros_like(self.bias))
-        self.mu_w.assign(tf.random.normal(tf.shape(self.mu_w), mean=0, stddev=stdv, dtype=tf.float64))
-        self.logsig2_w.assign(tf.random.normal(tf.shape(self.logsig2_w), mean=self.std_init, stddev=0.001, dtype=tf.float64))
-        
-        #initial_logsig2 = tf.constant(self.std_init, dtype=tf.float64) 
-        #self.logsig2_w.assign(tf.fill(tf.shape(self.logsig2_w), initial_logsig2))
+        self.mu_w.assign(
+            tf.random.normal(tf.shape(self.mu_w), mean=0, stddev=stdv, dtype=tf.float64)
+        )
+        self.logsig2_w.assign(
+            tf.random.normal(
+                tf.shape(self.logsig2_w), mean=self.std_init, stddev=0.001, dtype=tf.float64
+            )
+        )
 
     def reset_random(self):
         self.random = None
         self.map = False
 
     def kl_loss(self) -> tf.Tensor:
         logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, self.ubound)
-        kl = 0.5 * tf.reduce_sum((self.prior_prec*(tf.math.pow(self.mu_w,2)+tf.math.exp(logsig2_w))
-                                - logsig2_w - tf.constant(1.0, dtype=tf.float64) - tf.math.log(self.prior_prec)))
+        kl = 0.5 * tf.reduce_sum(
+            (
+                self.prior_prec * (tf.math.pow(self.mu_w, 2) + tf.math.exp(logsig2_w))
+                - logsig2_w
+                - tf.constant(1.0, dtype=tf.float64)
+                - tf.math.log(self.prior_prec)
+            )
+        )
         return kl
-        
+
+    def train(self):
+        self.training = True
+
+    def eval(self):
+        self.training = False
+
     def call(self, input: tf.Tensor) -> tf.Tensor:
         # Ensure input is tf.float64
         input = tf.cast(input, tf.float64)
-    
+
         if self.training:
-            mu_out = tf.matmul(input, tf.cast(self.mu_w, input.dtype), transpose_b=True) + tf.cast(self.bias, input.dtype)
+            mu_out = tf.matmul(input, tf.cast(self.mu_w, input.dtype), transpose_b=True) + tf.cast(
+                self.bias, input.dtype
+            )
             logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, self.ubound)
             s2_w = tf.math.exp(logsig2_w)
             input2 = tf.math.pow(input, 2)
             var_out = tf.matmul(input2, s2_w, transpose_b=True) + tf.cast(self.eps, input.dtype)
-        
-            return mu_out + tf.math.sqrt(var_out) * tf.random.normal(shape=tf.shape(mu_out), dtype=input.dtype)
-    
+
+            return mu_out + tf.math.sqrt(var_out) * tf.random.normal(
+                shape=tf.shape(mu_out), dtype=input.dtype
+            )
+
         else:
             # During inference, use MAP estimation (posterior mean) for deterministic output
             if self.map:
                 mu_out = tf.matmul(input, self.mu_w, transpose_b=True) + self.bias
                 return mu_out
-            
+
             logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, 11)
             if self.random is None:
-                self.random = tf.Variable(tf.random.normal(shape=tf.shape(self.mu_w), dtype=tf.float64))
+                self.random = tf.Variable(
+                    tf.random.normal(shape=tf.shape(self.mu_w), dtype=tf.float64)
+                )
             s2_w = tf.math.exp(logsig2_w)
             # draw fresh samples instead of caching
             epsilon = tf.random.normal(shape=tf.shape(self.mu_w), dtype=tf.float64)
-            weight = self.mu_w + tf.math.sqrt(s2_w) * epsilon #self.random #
-            
-            return tf.matmul(input, weight, transpose_b=True) + self.bias
+            weight = self.mu_w + tf.math.sqrt(s2_w) * epsilon  # self.random #
 
+            return tf.matmul(input, weight, transpose_b=True) + self.bias
 
 
 class Dense(KerasDense, MetaLayer):
@@ -237,6 +260,7 @@ def apply_dense(xinput):
 
     return apply_dense
 
+
 layers = {
     "dense": (
         Dense,
@@ -262,13 +286,7 @@ def apply_dense(xinput):
         LSTM_modified,
         {"kernel_initializer": "glorot_normal", "units": 5, "activation": "sigmoid"},
     ),
-    "VBDense": (
-        VBDense,
-        {   
-            "in_features" : 10, #hardcoded for now
-            "out_features" : 8, 
-        },
-    ),
+    "VBDense": (VBDense, {"in_features": 10, "out_features": 8}),  # hardcoded for now
     "dropout": (Dropout, {"rate": 0.0}),
     "concatenate": (Concatenate, {}),
 }
@@ -344,4 +362,4 @@ def regularizer_selector(reg_name, **kwargs):
         if key in reg_args.keys():
             reg_args[key] = value
 
-    return reg_class(**reg_args)
+    return reg_class(**reg_args)