Skip to content

Commit 8238497

Browse files
Integrating bayesian layer into n3fit
1 parent a470562 commit 8238497

39 files changed

+1634
-62
lines changed

n3fit/runcards/examples/Basic_runcard_qed.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ trvlseed: 1551864071
2727
nnseed: 676150632
2828
mcseed: 619859729
2929
save: false
30-
genrep: true # true = generate MC replicas, false = use real data
30+
genrep: false # true = generate MC replicas, false = use real data
3131

3232
parameters: # This defines the parameter dictionary that is passed to the Model Trainer
33-
nodes_per_layer: [25, 20, 8]
34-
activation_per_layer: [tanh, tanh, linear]
33+
nodes_per_layer: [15, 10, 8]
34+
activation_per_layer: ['gelu', 'gelu', 'gelu']
3535
initializer: glorot_normal
3636
optimizer:
3737
clipnorm: 6.073e-6
@@ -45,7 +45,7 @@ parameters: # This defines the parameter dictionary that is passed to the Model
4545
initial: 10
4646
multiplier:
4747
stopping_patience: 0.1
48-
layer_type: dense
48+
layer_type: ["dense", "dense", "VBDense"]
4949
dropout: 0.0
5050
threshold_chi2: 3.5
5151

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#
2+
# Configuration file for n3fit
3+
#
4+
############################################################
5+
description: Basic runcard with extra options such as debugging and tensorboard
6+
7+
############################################################
8+
# frac: training fraction
9+
# ewk: apply ewk k-factors
10+
# sys: systematics treatment (see systypes)
11+
dataset_inputs:
12+
- {dataset: NMC_NC_NOTFIXED_EM-F2, variant: legacy_dw}
13+
#- {dataset: SLAC_NC_NOTFIXED_D_EM-F2, variant: legacy_dw}
14+
#- {dataset: ATLAS_Z0J_8TEV_PT-M }
15+
16+
diagonal_frac: 0.75
17+
18+
############################################################
19+
datacuts:
20+
t0pdfset : NNPDF40_nnlo_as_01180 # PDF set to generate t0 covmat
21+
q2min : 3.49 # Q2 minimum
22+
w2min : 12.5 # W2 minimum
23+
24+
############################################################
25+
theory:
26+
theoryid: 41_000_000 # database id
27+
28+
parameters: # This defines the parameter dictionary that is passed to the Model Trainer
29+
nodes_per_layer: [15, 10, 8]
30+
activation_per_layer: ['gelu', 'gelu', 'gelu']
31+
initializer: 'glorot_normal'
32+
optimizer:
33+
optimizer_name: 'RMSprop'
34+
learning_rate: 0.01
35+
clipnorm: 1.0
36+
epochs: 900
37+
positivity:
38+
multiplier: 1.05 # When any of the multiplier and/or the initial is not set
39+
initial: # the maxlambda will be used instead to compute these values per dataset
40+
threshold: 1e-5
41+
stopping_patience: 0.30 # percentage of the number of epochs
42+
layer_type: ['dense','dense','VBDense']
43+
dropout: 0.0
44+
threshold_chi2: 5.0
45+
46+
############################################################
47+
trvlseed: 1
48+
nnseed: 2
49+
mcseed: 3
50+
genrep: True # true = generate MC replicas, false = use real data
51+
52+
fitting:
53+
# NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7)
54+
# EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7)
55+
# EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7)
56+
# FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7)
57+
fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc.
58+
basis:
59+
# remeber to change the name of PDF accordingly with fitbasis
60+
- { fl: sng, smallx: [1.05,1.19], largex: [1.47,2.70], trainable: False }
61+
- { fl: g, smallx: [0.94,1.25], largex: [0.11,5.87], trainable: False }
62+
- { fl: v, smallx: [0.54,0.75], largex: [1.15,2.76], trainable: False }
63+
- { fl: v3, smallx: [0.21,0.57], largex: [1.35,3.08] }
64+
- { fl: v8, smallx: [0.52,0.76], largex: [0.77,3.56], trainable: True }
65+
- { fl: t3, smallx: [-0.37,1.52], largex: [1.74,3.39] }
66+
- { fl: t8, smallx: [0.56,1.29], largex: [1.45,3.03] }
67+
- { fl: cp, smallx: [0.12,1.19], largex: [1.83,6.70] }
68+
69+
############################################################
70+
positivity:
71+
posdatasets:
72+
#- {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6} # Positivity Lagrange Multiplier
73+
#- {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6}
74+
#- {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6}
75+
76+
############################################################
77+
integrability:
78+
integdatasets:
79+
- {dataset: NNPDF_INTEG_3GEV_XT3, maxlambda: 1e2}
80+
81+
# Apply a cut to a dataset or process type, in this case to the positivity dataset `NNPDF_POS_2P24GEV_XGL` which has process type: POS_XPDF
82+
added_filter_rules:
83+
- process_type: POS_XPDF
84+
rule: "x > 0.1"
85+
86+
############################################################
87+
debug: True
88+
maxcores: 8
89+
parallel_models: false
90+
91+
tensorboard:
92+
weight_freq: 100
93+
profiling: False
94+
95+
save: 'weights.h5'
96+
# load: '/path/to/weights.h5/file'
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#
2+
# Configuration file for n3fit
3+
#
4+
############################################################
5+
description: Basic runcard with extra options such as debugging and tensorboard
6+
7+
############################################################
8+
# frac: training fraction
9+
# ewk: apply ewk k-factors
10+
# sys: systematics treatment (see systypes)
11+
dataset_inputs:
12+
- {dataset: NMC_NC_NOTFIXED_EM-F2, variant: legacy_dw}
13+
#- {dataset: SLAC_NC_NOTFIXED_D_EM-F2, variant: legacy_dw}
14+
#- {dataset: ATLAS_Z0J_8TEV_PT-M }
15+
16+
diagonal_frac: 0.75
17+
18+
############################################################
19+
datacuts:
20+
t0pdfset : NNPDF40_nnlo_as_01180 # PDF set to generate t0 covmat
21+
q2min : 3.49 # Q2 minimum
22+
w2min : 12.5 # W2 minimum
23+
24+
############################################################
25+
theory:
26+
theoryid: 41_000_000 # database id
27+
28+
parameters: # This defines the parameter dictionary that is passed to the Model Trainer
29+
nodes_per_layer: [15, 10, 8]
30+
activation_per_layer: ['gelu', 'gelu', 'gelu']
31+
initializer: 'glorot_normal'
32+
optimizer:
33+
optimizer_name: 'RMSprop'
34+
learning_rate: 0.01
35+
clipnorm: 1.0
36+
epochs: 900
37+
positivity:
38+
multiplier: 1.05 # When any of the multiplier and/or the initial is not set
39+
initial: # the maxlambda will be used instead to compute these values per dataset
40+
threshold: 1e-5
41+
stopping_patience: 0.30 # percentage of the number of epochs
42+
layer_type: ['dense','dense','dense']
43+
dropout: 0.0
44+
threshold_chi2: 5.0
45+
46+
############################################################
47+
trvlseed: 1
48+
nnseed: 2
49+
mcseed: 3
50+
genrep: True # true = generate MC replicas, false = use real data
51+
52+
fitting:
53+
# NN23(QED) = sng=0,g=1,v=2,t3=3,ds=4,sp=5,sm=6,(pht=7)
54+
# EVOL(QED) = sng=0,g=1,v=2,v3=3,v8=4,t3=5,t8=6,(pht=7)
55+
# EVOLS(QED)= sng=0,g=1,v=2,v8=4,t3=4,t8=5,ds=6,(pht=7)
56+
# FLVR(QED) = g=0, u=1, ubar=2, d=3, dbar=4, s=5, sbar=6, (pht=7)
57+
fitbasis: NN31IC # EVOL (7), EVOLQED (8), etc.
58+
basis:
59+
# remeber to change the name of PDF accordingly with fitbasis
60+
- { fl: sng, smallx: [1.05,1.19], largex: [1.47,2.70], trainable: False }
61+
- { fl: g, smallx: [0.94,1.25], largex: [0.11,5.87], trainable: False }
62+
- { fl: v, smallx: [0.54,0.75], largex: [1.15,2.76], trainable: False }
63+
- { fl: v3, smallx: [0.21,0.57], largex: [1.35,3.08] }
64+
- { fl: v8, smallx: [0.52,0.76], largex: [0.77,3.56], trainable: True }
65+
- { fl: t3, smallx: [-0.37,1.52], largex: [1.74,3.39] }
66+
- { fl: t8, smallx: [0.56,1.29], largex: [1.45,3.03] }
67+
- { fl: cp, smallx: [0.12,1.19], largex: [1.83,6.70] }
68+
69+
############################################################
70+
positivity:
71+
posdatasets:
72+
#- {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6} # Positivity Lagrange Multiplier
73+
#- {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6}
74+
#- {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6}
75+
76+
############################################################
77+
integrability:
78+
integdatasets:
79+
- {dataset: NNPDF_INTEG_3GEV_XT3, maxlambda: 1e2}
80+
81+
# Apply a cut to a dataset or process type, in this case to the positivity dataset `NNPDF_POS_2P24GEV_XGL` which has process type: POS_XPDF
82+
added_filter_rules:
83+
- process_type: POS_XPDF
84+
rule: "x > 0.1"
85+
86+
############################################################
87+
debug: True
88+
maxcores: 8
89+
parallel_models: false
90+
91+
tensorboard:
92+
weight_freq: 100
93+
profiling: False
94+
95+
save: 'weights.h5'
96+
# load: '/path/to/weights.h5/file'

n3fit/src/n3fit/backends/keras_backend/base_layers.py

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,21 @@
1616
1717
The names of the layer and the activation function are the ones to be used in the n3fit runcard.
1818
"""
19+
import numpy as np
20+
import keras.backend as K
21+
import tensorflow as tf
22+
import math
23+
from scipy.stats import norm
1924

2025
from keras.layers import Dense as KerasDense
21-
from keras.layers import Dropout, Lambda
26+
from keras.layers import Dropout, Lambda, Layer
2227
from keras.layers import Input # pylint: disable=unused-import
2328
from keras.layers import LSTM, Concatenate
2429
from keras.regularizers import l1_l2
2530

2631
from . import operations as ops
2732
from .MetaLayer import MetaLayer
33+
from contextlib import contextmanager
2834

2935

3036
# Custom activation functions
@@ -74,14 +80,101 @@ def ReshapedLSTM(input_tensor):
7480

7581
return ReshapedLSTM
7682

83+
class VBDense(Layer):
84+
def __init__(self, out_features: int, in_features: int, prior_prec: float = 0.01,
85+
map: bool = False, std_init: float = -9, lbound=-30, ubound=11, training = True):
86+
super().__init__()
87+
self.output_dim = out_features
88+
self.input_dim = in_features
89+
self.map = map
90+
self.prior_prec = tf.cast(prior_prec, tf.float64)
91+
self.random = None
92+
self.eps = 1e-12 if K.floatx() == 'float64' else 1e-8
93+
self.std_init = tf.cast(std_init, tf.float64)
94+
self.lbound = lbound
95+
self.ubound = ubound
96+
self.training = training
97+
98+
def build(self, input_shape):
99+
self.bias = self.add_weight(
100+
name='bias',
101+
shape=(self.output_dim,),
102+
initializer='glorot_normal',
103+
trainable=True,
104+
dtype=tf.float64
105+
)
106+
107+
self.mu_w = self.add_weight(
108+
name='mu_w',
109+
shape=(self.output_dim, self.input_dim),
110+
initializer='glorot_normal',
111+
trainable=True,
112+
dtype=tf.float64
113+
)
114+
115+
self.logsig2_w = self.add_weight(
116+
name='logsig2_w',
117+
shape=(self.output_dim, self.input_dim),
118+
initializer='glorot_normal',
119+
trainable=True,
120+
dtype=tf.float64,
121+
)
122+
123+
self.reset_parameters()
124+
125+
def reset_parameters(self):
126+
stdv = 1.0 / tf.math.sqrt(tf.cast(self.input_dim, dtype=tf.float64))
127+
self.bias.assign(tf.zeros_like(self.bias))
128+
self.mu_w.assign(tf.random.normal(tf.shape(self.mu_w), mean=0, stddev=stdv, dtype=tf.float64))
129+
#self.logsig2_w.assign(tf.random.normal(tf.shape(self.logsig2_w), mean=0.001, stddev=self.std_init, dtype=tf.float64))
130+
initial_logsig2 = tf.constant(self.std_init, dtype=tf.float64)
131+
self.logsig2_w.assign(tf.fill(tf.shape(self.logsig2_w), initial_logsig2))
132+
133+
def reset_random(self):
134+
self.random = None
135+
self.map = False
136+
137+
def kl_loss(self) -> tf.Tensor:
138+
logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, self.ubound)
139+
kl = 0.5 * tf.reduce_sum((self.prior_prec*(tf.math.pow(self.mu_w,2)+tf.math.exp(logsig2_w))
140+
- logsig2_w - tf.constant(1.0, dtype=tf.float64) - tf.math.log(self.prior_prec)))
141+
return kl
142+
143+
def call(self, input: tf.Tensor) -> tf.Tensor:
144+
# Ensure input is tf.float64
145+
input = tf.cast(input, tf.float64)
146+
147+
if self.training:
148+
mu_out = tf.matmul(input, tf.cast(self.mu_w, input.dtype), transpose_b=True) + tf.cast(self.bias, input.dtype)
149+
logsig2_w = tf.clip_by_value(self.logsig2_w, self.lbound, self.ubound)
150+
s2_w = tf.math.exp(logsig2_w)
151+
input2 = tf.math.pow(input, 2)
152+
var_out = tf.matmul(input2, s2_w, transpose_b=True) + tf.cast(self.eps, input.dtype)
153+
154+
return mu_out + tf.math.sqrt(var_out) * tf.random.normal(shape=tf.shape(mu_out), dtype=input.dtype)
155+
156+
else:
157+
# During inference, use MAP estimation (posterior mean) for deterministic output
158+
mu_out = tf.matmul(input, tf.cast(self.mu_w, input.dtype), transpose_b=True) + tf.cast(self.bias, input.dtype)
159+
return mu_out
160+
161+
77162

78163
class Dense(KerasDense, MetaLayer):
164+
<<<<<<< Updated upstream
79165

80166
def __init__(self, *args, **kwargs):
81167
# In Keras == 3.13, np.int() is not accepted by Dense
82168
if "units" in kwargs:
83169
kwargs["units"] = int(kwargs["units"])
84170
super().__init__(*args, **kwargs)
171+
=======
172+
def __init__(self, **kwargs):
173+
# Set default dtype to tf.float64 if not provided
174+
if 'dtype' not in kwargs:
175+
kwargs['dtype'] = tf.float64
176+
super().__init__(**kwargs)
177+
>>>>>>> Stashed changes
85178

86179

87180
def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_kwargs):
@@ -133,7 +226,6 @@ def apply_dense(xinput):
133226

134227
return apply_dense
135228

136-
137229
layers = {
138230
"dense": (
139231
Dense,
@@ -142,6 +234,7 @@ def apply_dense(xinput):
142234
"units": 5,
143235
"activation": "sigmoid",
144236
"kernel_regularizer": None,
237+
"dtype": tf.float64,
145238
},
146239
),
147240
"dense_per_flavour": (
@@ -151,12 +244,21 @@ def apply_dense(xinput):
151244
"units": 5,
152245
"activation": "sigmoid",
153246
"basis_size": 8,
247+
"dtype": tf.float64,
154248
},
155249
),
156250
"LSTM": (
157251
LSTM_modified,
158252
{"kernel_initializer": "glorot_normal", "units": 5, "activation": "sigmoid"},
159253
),
254+
"VBDense": (
255+
VBDense,
256+
{
257+
"in_features" : 10,
258+
"out_features" : 8,
259+
"training": "False",
260+
},
261+
),
160262
"dropout": (Dropout, {"rate": 0.0}),
161263
"concatenate": (Concatenate, {}),
162264
}
@@ -173,7 +275,7 @@ def base_layer_selector(layer_name, **kwargs):
173275
174276
Parameters
175277
----------
176-
`layer_name
278+
`layer_name`
177279
str with the name of the layer
178280
`**kwargs`
179281
extra optional arguments to pass to the layer (beyond their defaults)
@@ -232,4 +334,4 @@ def regularizer_selector(reg_name, **kwargs):
232334
if key in reg_args.keys():
233335
reg_args[key] = value
234336

235-
return reg_class(**reg_args)
337+
return reg_class(**reg_args)

0 commit comments

Comments
 (0)