Skip to content

Commit 9f3443f

Browse files
[reuse] Fix Order-dependent test. The root case is that large input data also increases variance.
PiperOrigin-RevId: 419617435
1 parent 6ce292d commit 9f3443f

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

official/nlp/modeling/layers/reuse_transformer_test.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_layer_invocation(self, transformer_cls):
6868
# Invoke the model on test data. We can't validate the output data itself
6969
# (the NN is too complex) but this will rule out structural runtime errors.
7070
batch_size = 6
71-
input_data = 10 * np.random.random_sample(
71+
input_data = np.random.random_sample(
7272
(batch_size, sequence_length, width))
7373
_ = model.predict(input_data)
7474

@@ -89,7 +89,7 @@ def test_layer_invocation_with_mask(self, transformer_cls):
8989
# Invoke the model on test data. We can't validate the output data itself
9090
# (the NN is too complex) but this will rule out structural runtime errors.
9191
batch_size = 6
92-
input_data = 10 * np.random.random_sample(
92+
input_data = np.random.random_sample(
9393
(batch_size, sequence_length, width))
9494
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
9595
# which here is (batch, sequence_length, sequence_length)
@@ -104,7 +104,7 @@ def test_layer_output_range(self, transformer_cls):
104104
width = 80
105105

106106
batch_size = 6
107-
input_data = 10 * np.random.random_sample(
107+
input_data = np.random.random_sample(
108108
(batch_size, sequence_length, width))
109109
mask_data = np.random.randint(
110110
2, size=(batch_size, sequence_length, sequence_length))
@@ -121,7 +121,7 @@ def test_layer_output_range(self, transformer_cls):
121121
new_layer.set_weights(test_layer.get_weights())
122122
new_output_tensor, _ = new_layer([input_data, mask_data])
123123
self.assertAllClose(
124-
new_output_tensor, output_tensor[:, 0:1, :], atol=0.002, rtol=0.25)
124+
new_output_tensor, output_tensor[:, 0:1, :], atol=0.002, rtol=0.01)
125125

126126
def test_layer_output_range_with_relative_pe(self, transformer_cls):
127127
test_layer = transformer_cls(
@@ -131,7 +131,7 @@ def test_layer_output_range_with_relative_pe(self, transformer_cls):
131131
width = 80
132132

133133
batch_size = 6
134-
input_data = 10 * np.random.random_sample(
134+
input_data = np.random.random_sample(
135135
(batch_size, sequence_length, width))
136136
mask_data = np.random.randint(
137137
2, size=(batch_size, sequence_length, sequence_length))
@@ -149,7 +149,7 @@ def test_layer_output_range_with_relative_pe(self, transformer_cls):
149149
new_layer.set_weights(test_layer.get_weights())
150150
new_output_tensor, _ = new_layer([input_data, mask_data])
151151
self.assertAllClose(
152-
new_output_tensor, output_tensor[:, 0:1, :], atol=5e-5, rtol=0.003)
152+
new_output_tensor, output_tensor[:, 0:1, :], atol=0.002, rtol=0.01)
153153

154154
def test_layer_output_range_without_mask(self, transformer_cls):
155155
test_layer = transformer_cls(
@@ -159,7 +159,7 @@ def test_layer_output_range_without_mask(self, transformer_cls):
159159
width = 80
160160

161161
batch_size = 6
162-
input_data = 10 * np.random.random_sample(
162+
input_data = np.random.random_sample(
163163
(batch_size, sequence_length, width))
164164
output_tensor, _ = test_layer(input_data)
165165

@@ -175,7 +175,7 @@ def test_layer_output_range_without_mask(self, transformer_cls):
175175
new_layer.set_weights(test_layer.get_weights())
176176
new_output_tensor, _ = new_layer(input_data)
177177
self.assertAllClose(
178-
new_output_tensor, output_tensor[:, 0:1, :], atol=5e-5, rtol=0.003)
178+
new_output_tensor, output_tensor[:, 0:1, :], atol=0.002, rtol=0.01)
179179

180180
def test_layer_output_range_with_pre_norm(self, transformer_cls):
181181
test_layer = transformer_cls(
@@ -185,7 +185,7 @@ def test_layer_output_range_with_pre_norm(self, transformer_cls):
185185
width = 80
186186

187187
batch_size = 6
188-
input_data = 10 * np.random.random_sample(
188+
input_data = np.random.random_sample(
189189
(batch_size, sequence_length, width))
190190
mask_data = np.random.randint(
191191
2, size=(batch_size, sequence_length, sequence_length))
@@ -203,7 +203,7 @@ def test_layer_output_range_with_pre_norm(self, transformer_cls):
203203
new_layer.set_weights(test_layer.get_weights())
204204
new_output_tensor, _ = new_layer([input_data, mask_data])
205205
self.assertAllClose(
206-
new_output_tensor, output_tensor[:, 0:1, :], atol=5e-5, rtol=0.003)
206+
new_output_tensor, output_tensor[:, 0:1, :], atol=0.002, rtol=0.01)
207207

208208
def test_layer_invocation_with_float16_dtype(self, transformer_cls):
209209
tf.keras.mixed_precision.set_global_policy('mixed_float16')
@@ -223,7 +223,7 @@ def test_layer_invocation_with_float16_dtype(self, transformer_cls):
223223
# Invoke the model on test data. We can't validate the output data itself
224224
# (the NN is too complex) but this will rule out structural runtime errors.
225225
batch_size = 6
226-
input_data = (10 * np.random.random_sample(
226+
input_data = (np.random.random_sample(
227227
(batch_size, sequence_length, width)))
228228
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
229229
# which here is (batch, sequence_length, sequence_length)
@@ -368,7 +368,7 @@ def test_layer_invocation_with_mask(self, reuse_attention,
368368
# Invoke the model on test data. We can't validate the output data itself
369369
# (the NN is too complex) but this will rule out structural runtime errors.
370370
batch_size = 6
371-
input_data = 10 * np.random.random_sample(
371+
input_data = np.random.random_sample(
372372
(batch_size, sequence_length, width))
373373
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
374374
# which here is (batch, sequence_length, sequence_length)
@@ -404,7 +404,7 @@ def test_layer_invocation_with_float16_with_relative_pe(
404404
# Invoke the model on test data. We can't validate the output data itself
405405
# (the NN is too complex) but this will rule out structural runtime errors.
406406
batch_size = 6
407-
input_data = (10 * np.random.random_sample(
407+
input_data = (np.random.random_sample(
408408
(batch_size, sequence_length, width)))
409409
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
410410
# which here is (batch, sequence_length, sequence_length)

0 commit comments

Comments
 (0)