@@ -302,8 +302,11 @@ def make_all_inputs(input_fields):
302
302
# if we run sync mode
303
303
sync = False
304
304
305
- # how many batches we use
306
- batch_num = 2
305
+ if not core .is_compiled_with_cuda ():
306
+ # how many batches we use
307
+ batch_num = 50
308
+ else :
309
+ batch_num = 5
307
310
308
311
np .random .seed = 1
309
312
src_word_np = np .random .randint (
@@ -335,24 +338,6 @@ def make_all_inputs(input_fields):
335
338
dtype = 'int64' )
336
339
lbl_weight_np = np .random .randn (batch_size * seq_len , 1 ).astype ('float32' )
337
340
338
- # np.random.seed = 1
339
- # src_word_np = np.arange(0, 10).reshape([batch_size, seq_len, 1]).astype('int64')
340
- # src_pos_np = np.random.randint(
341
- # 1, seq_len, size=(batch_size, seq_len, 1), dtype='int64')
342
- # src_slf_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head,
343
- # seq_len, seq_len).astype('float32')
344
- #
345
- # trg_word_np = np.arange(0, 10).reshape([batch_size, seq_len, 1]).astype('int64')
346
- # trg_pos_np = np.random.randint(
347
- # 1, seq_len, size=(batch_size, seq_len, 1), dtype='int64')
348
- # trg_slf_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head,
349
- # seq_len, seq_len).astype('float32')
350
- # trg_src_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head,
351
- # seq_len, seq_len).astype('float32')
352
- #
353
- # lbl_word_np = np.arange(0, 10).reshape([batch_size * seq_len, 1]).astype('int64')
354
- # lbl_weight_np = np.random.randn(batch_size * seq_len, 1).astype('float32')
355
- #
356
341
pos_inp1 = position_encoding_init (ModelHyperParams .max_length ,
357
342
ModelHyperParams .d_model )
358
343
pos_inp2 = position_encoding_init (ModelHyperParams .max_length ,
@@ -739,7 +724,7 @@ def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias):
739
724
enc_attn_output_pp = self ._multihead_attention_layer2 (
740
725
pre_process_rlt2 , enc_output , enc_output , dec_enc_attn_bias )
741
726
enc_attn_output = self ._post_process_layer2 (
742
- slf_attn_output , enc_attn_output_pp , self ._postprocess_cmd ,
727
+ slf_attn_output_pp , enc_attn_output_pp , self ._postprocess_cmd ,
743
728
self ._prepostprcess_dropout )
744
729
pre_process_rlt3 = self ._pre_process_layer3 (None , enc_attn_output ,
745
730
self ._preprocess_cmd ,
@@ -1076,20 +1061,17 @@ def test_transformer_float32(self):
1076
1061
4 ]] = out [k ]
1077
1062
1078
1063
self .assertTrue (
1079
- np .allclose (static_avg_cost_value , dy_avg_cost ._numpy ()))
1064
+ np .array_equal (static_avg_cost_value , dy_avg_cost ._numpy ()))
1080
1065
self .assertTrue (
1081
- np .allclose (static_sum_cost_value , dy_sum_cost ._numpy ()))
1066
+ np .array_equal (static_sum_cost_value , dy_sum_cost ._numpy ()))
1082
1067
self .assertTrue (
1083
- np .allclose (
1084
- static_predict_value , dy_predict ._numpy (), atol = 1e-5 ))
1068
+ np .array_equal (static_predict_value , dy_predict ._numpy ()))
1085
1069
self .assertTrue (
1086
- np .allclose (static_token_num_value , dy_token_num ._numpy ()))
1070
+ np .array_equal (static_token_num_value , dy_token_num ._numpy ()))
1087
1071
for key , value in six .iteritems (static_param_init ):
1088
- self .assertTrue (np .allclose (value , dy_param_init [key ]))
1072
+ self .assertTrue (np .array_equal (value , dy_param_init [key ]))
1089
1073
for key , value in six .iteritems (static_param_updated ):
1090
- self .assertTrue (
1091
- np .allclose (
1092
- value , dy_param_updated [key ], atol = 1e-4 ))
1074
+ self .assertTrue (np .array_equal (value , dy_param_updated [key ]))
1093
1075
1094
1076
1095
1077
if __name__ == '__main__' :
0 commit comments