@@ -68,7 +68,7 @@ def test_layer_invocation(self, transformer_cls):
68
68
# Invoke the model on test data. We can't validate the output data itself
69
69
# (the NN is too complex) but this will rule out structural runtime errors.
70
70
batch_size = 6
71
- input_data = 10 * np .random .random_sample (
71
+ input_data = np .random .random_sample (
72
72
(batch_size , sequence_length , width ))
73
73
_ = model .predict (input_data )
74
74
@@ -89,7 +89,7 @@ def test_layer_invocation_with_mask(self, transformer_cls):
89
89
# Invoke the model on test data. We can't validate the output data itself
90
90
# (the NN is too complex) but this will rule out structural runtime errors.
91
91
batch_size = 6
92
- input_data = 10 * np .random .random_sample (
92
+ input_data = np .random .random_sample (
93
93
(batch_size , sequence_length , width ))
94
94
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
95
95
# which here is (batch, sequence_length, sequence_length)
@@ -104,7 +104,7 @@ def test_layer_output_range(self, transformer_cls):
104
104
width = 80
105
105
106
106
batch_size = 6
107
- input_data = 10 * np .random .random_sample (
107
+ input_data = np .random .random_sample (
108
108
(batch_size , sequence_length , width ))
109
109
mask_data = np .random .randint (
110
110
2 , size = (batch_size , sequence_length , sequence_length ))
@@ -121,7 +121,7 @@ def test_layer_output_range(self, transformer_cls):
121
121
new_layer .set_weights (test_layer .get_weights ())
122
122
new_output_tensor , _ = new_layer ([input_data , mask_data ])
123
123
self .assertAllClose (
124
- new_output_tensor , output_tensor [:, 0 :1 , :], atol = 0.002 , rtol = 0.25 )
124
+ new_output_tensor , output_tensor [:, 0 :1 , :], atol = 0.002 , rtol = 0.01 )
125
125
126
126
def test_layer_output_range_with_relative_pe (self , transformer_cls ):
127
127
test_layer = transformer_cls (
@@ -131,7 +131,7 @@ def test_layer_output_range_with_relative_pe(self, transformer_cls):
131
131
width = 80
132
132
133
133
batch_size = 6
134
- input_data = 10 * np .random .random_sample (
134
+ input_data = np .random .random_sample (
135
135
(batch_size , sequence_length , width ))
136
136
mask_data = np .random .randint (
137
137
2 , size = (batch_size , sequence_length , sequence_length ))
@@ -149,7 +149,7 @@ def test_layer_output_range_with_relative_pe(self, transformer_cls):
149
149
new_layer .set_weights (test_layer .get_weights ())
150
150
new_output_tensor , _ = new_layer ([input_data , mask_data ])
151
151
self .assertAllClose (
152
- new_output_tensor , output_tensor [:, 0 :1 , :], atol = 5e-5 , rtol = 0.003 )
152
+ new_output_tensor , output_tensor [:, 0 :1 , :], atol = 0.002 , rtol = 0.01 )
153
153
154
154
def test_layer_output_range_without_mask (self , transformer_cls ):
155
155
test_layer = transformer_cls (
@@ -159,7 +159,7 @@ def test_layer_output_range_without_mask(self, transformer_cls):
159
159
width = 80
160
160
161
161
batch_size = 6
162
- input_data = 10 * np .random .random_sample (
162
+ input_data = np .random .random_sample (
163
163
(batch_size , sequence_length , width ))
164
164
output_tensor , _ = test_layer (input_data )
165
165
@@ -175,7 +175,7 @@ def test_layer_output_range_without_mask(self, transformer_cls):
175
175
new_layer .set_weights (test_layer .get_weights ())
176
176
new_output_tensor , _ = new_layer (input_data )
177
177
self .assertAllClose (
178
- new_output_tensor , output_tensor [:, 0 :1 , :], atol = 5e-5 , rtol = 0.003 )
178
+ new_output_tensor , output_tensor [:, 0 :1 , :], atol = 0.002 , rtol = 0.01 )
179
179
180
180
def test_layer_output_range_with_pre_norm (self , transformer_cls ):
181
181
test_layer = transformer_cls (
@@ -185,7 +185,7 @@ def test_layer_output_range_with_pre_norm(self, transformer_cls):
185
185
width = 80
186
186
187
187
batch_size = 6
188
- input_data = 10 * np .random .random_sample (
188
+ input_data = np .random .random_sample (
189
189
(batch_size , sequence_length , width ))
190
190
mask_data = np .random .randint (
191
191
2 , size = (batch_size , sequence_length , sequence_length ))
@@ -203,7 +203,7 @@ def test_layer_output_range_with_pre_norm(self, transformer_cls):
203
203
new_layer .set_weights (test_layer .get_weights ())
204
204
new_output_tensor , _ = new_layer ([input_data , mask_data ])
205
205
self .assertAllClose (
206
- new_output_tensor , output_tensor [:, 0 :1 , :], atol = 5e-5 , rtol = 0.003 )
206
+ new_output_tensor , output_tensor [:, 0 :1 , :], atol = 0.002 , rtol = 0.01 )
207
207
208
208
def test_layer_invocation_with_float16_dtype (self , transformer_cls ):
209
209
tf .keras .mixed_precision .set_global_policy ('mixed_float16' )
@@ -223,7 +223,7 @@ def test_layer_invocation_with_float16_dtype(self, transformer_cls):
223
223
# Invoke the model on test data. We can't validate the output data itself
224
224
# (the NN is too complex) but this will rule out structural runtime errors.
225
225
batch_size = 6
226
- input_data = (10 * np .random .random_sample (
226
+ input_data = (np .random .random_sample (
227
227
(batch_size , sequence_length , width )))
228
228
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
229
229
# which here is (batch, sequence_length, sequence_length)
@@ -368,7 +368,7 @@ def test_layer_invocation_with_mask(self, reuse_attention,
368
368
# Invoke the model on test data. We can't validate the output data itself
369
369
# (the NN is too complex) but this will rule out structural runtime errors.
370
370
batch_size = 6
371
- input_data = 10 * np .random .random_sample (
371
+ input_data = np .random .random_sample (
372
372
(batch_size , sequence_length , width ))
373
373
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
374
374
# which here is (batch, sequence_length, sequence_length)
@@ -404,7 +404,7 @@ def test_layer_invocation_with_float16_with_relative_pe(
404
404
# Invoke the model on test data. We can't validate the output data itself
405
405
# (the NN is too complex) but this will rule out structural runtime errors.
406
406
batch_size = 6
407
- input_data = (10 * np .random .random_sample (
407
+ input_data = (np .random .random_sample (
408
408
(batch_size , sequence_length , width )))
409
409
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
410
410
# which here is (batch, sequence_length, sequence_length)
0 commit comments