Skip to content

Commit 6537de9

Browse files
authored
Actually use layer norm epsilon in encoder/decoder (#133)
We forgot to pass it to the sublayers.
1 parent a6a3656 commit 6537de9

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

keras_nlp/layers/transformer_decoder.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,15 @@ def _build(self, input_shape):
117117
bias_initializer=self.bias_initializer,
118118
)
119119

120-
self._decoder_attention_layernorm = keras.layers.LayerNormalization()
121-
self._enc_dec_attention_layernorm = keras.layers.LayerNormalization()
122-
self._feedforward_layernorm = keras.layers.LayerNormalization()
120+
self._decoder_attention_layernorm = keras.layers.LayerNormalization(
121+
epsilon=self.layer_norm_epsilon,
122+
)
123+
self._enc_dec_attention_layernorm = keras.layers.LayerNormalization(
124+
epsilon=self.layer_norm_epsilon,
125+
)
126+
self._feedforward_layernorm = keras.layers.LayerNormalization(
127+
epsilon=self.layer_norm_epsilon,
128+
)
123129

124130
self._self_attention_dropout = keras.layers.Dropout(rate=self.dropout)
125131
self._enc_dec_attentiondropout = keras.layers.Dropout(

keras_nlp/layers/transformer_encoder.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,12 @@ def _build(self, input_shape):
104104
bias_initializer=self.bias_initializer,
105105
)
106106

107-
self._attention_layernorm = keras.layers.LayerNormalization()
108-
self._feedforward_layernorm = keras.layers.LayerNormalization()
107+
self._attention_layernorm = keras.layers.LayerNormalization(
108+
epsilon=self.layer_norm_epsilon,
109+
)
110+
self._feedforward_layernorm = keras.layers.LayerNormalization(
111+
epsilon=self.layer_norm_epsilon,
112+
)
109113

110114
self._attention_dropout = keras.layers.Dropout(rate=self.dropout)
111115

0 commit comments

Comments
 (0)