36
36
import paddle .fluid .layers as layers
37
37
from paddle .fluid import core
38
38
from test_dist_base import TestDistRunnerBase , runtime_main
39
+ import paddle .compat as cpt
39
40
from paddle .compat import long_type
40
41
41
42
import hashlib
@@ -315,8 +316,9 @@ def pad_batch_data(insts,
315
316
"""
316
317
return_list = []
317
318
max_len = max (len (inst ) for inst in insts )
318
- num_token = reduce (lambda x , y : x + y ,
319
- [len (inst ) for inst in insts ]) if return_num_token else 0
319
+ num_token = six .moves .reduce (
320
+ lambda x , y : x + y ,
321
+ [len (inst ) for inst in insts ]) if return_num_token else 0
320
322
# Any token included in dict can be used to pad, since the paddings' loss
321
323
# will be masked out by weights and make no effect on parameter gradients.
322
324
inst_data = np .array (
@@ -328,7 +330,7 @@ def pad_batch_data(insts,
328
330
return_list += [inst_weight .astype ("float32" ).reshape ([- 1 , 1 ])]
329
331
else : # position data
330
332
inst_pos = np .array ([
331
- range (1 , len (inst ) + 1 ) + [0 ] * (max_len - len (inst ))
333
+ list ( range (1 , len (inst ) + 1 ) ) + [0 ] * (max_len - len (inst ))
332
334
for inst in insts
333
335
])
334
336
return_list += [inst_pos .astype ("int64" ).reshape ([- 1 , 1 ])]
@@ -385,10 +387,11 @@ def prepare_batch_input(insts, data_input_names, src_pad_idx, trg_pad_idx,
385
387
return_num_token = True )
386
388
387
389
data_input_dict = dict (
388
- zip (data_input_names , [
389
- src_word , src_pos , src_slf_attn_bias , trg_word , trg_pos ,
390
- trg_slf_attn_bias , trg_src_attn_bias , lbl_word , lbl_weight
391
- ]))
390
+ list (
391
+ zip (data_input_names , [
392
+ src_word , src_pos , src_slf_attn_bias , trg_word , trg_pos ,
393
+ trg_slf_attn_bias , trg_src_attn_bias , lbl_word , lbl_weight
394
+ ])))
392
395
return data_input_dict , np .asarray ([num_token ], dtype = "float32" )
393
396
394
397
@@ -561,7 +564,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
561
564
np .log (TrainTaskConfig .label_smooth_eps / (
562
565
ModelHyperParams .trg_vocab_size - 1 ) + 1e-20 ))
563
566
init = False
564
- for pass_id in xrange (TrainTaskConfig .pass_num ):
567
+ for pass_id in six . moves . xrange (TrainTaskConfig .pass_num ):
565
568
pass_start_time = time .time ()
566
569
for batch_id , data in enumerate (train_data ()):
567
570
if batch_id >= 5 :
@@ -587,11 +590,11 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
587
590
ModelHyperParams .eos_idx , ModelHyperParams .n_head ,
588
591
ModelHyperParams .d_model )
589
592
total_num_token += num_token
590
- feed_kv_pairs = data_input_dict .items ()
593
+ feed_kv_pairs = list ( data_input_dict .items () )
591
594
if TrainTaskConfig .local :
592
- feed_kv_pairs += {
595
+ feed_kv_pairs += list ( {
593
596
lr_scheduler .learning_rate .name : lr_rate
594
- }.items ()
597
+ }.items ())
595
598
feed_list .append (dict (feed_kv_pairs ))
596
599
597
600
if not init :
@@ -873,6 +876,7 @@ def _load_lines(self, fpattern, tar_fname):
873
876
874
877
f = tarfile .open (fpaths [0 ], "r" )
875
878
for line in f .extractfile (tar_fname ):
879
+ line = cpt .to_text (line )
876
880
fields = line .strip ("\n " ).split (self ._field_delimiter )
877
881
if (not self ._only_src and len (fields ) == 2 ) or (
878
882
self ._only_src and len (fields ) == 1 ):
@@ -882,8 +886,9 @@ def _load_lines(self, fpattern, tar_fname):
882
886
if not os .path .isfile (fpath ):
883
887
raise IOError ("Invalid file: %s" % fpath )
884
888
885
- with open (fpath , "r " ) as f :
889
+ with open (fpath , "rb " ) as f :
886
890
for line in f :
891
+ line = cpt .to_text (line )
887
892
fields = line .strip ("\n " ).split (self ._field_delimiter )
888
893
if (not self ._only_src and len (fields ) == 2 ) or (
889
894
self ._only_src and len (fields ) == 1 ):
@@ -892,8 +897,9 @@ def _load_lines(self, fpattern, tar_fname):
892
897
@staticmethod
893
898
def load_dict (dict_path , reverse = False ):
894
899
word_dict = {}
895
- with open (dict_path , "r " ) as fdict :
900
+ with open (dict_path , "rb " ) as fdict :
896
901
for idx , line in enumerate (fdict ):
902
+ line = cpt .to_text (line )
897
903
if reverse :
898
904
word_dict [idx ] = line .strip ("\n " )
899
905
else :
@@ -1034,7 +1040,7 @@ def __combine_heads(x):
1034
1040
# size of the input as the output dimension size.
1035
1041
return layers .reshape (
1036
1042
x = trans_x ,
1037
- shape = map (int , [0 , 0 , trans_x .shape [2 ] * trans_x .shape [3 ]]))
1043
+ shape = list ( map (int , [0 , 0 , trans_x .shape [2 ] * trans_x .shape [3 ]]) ))
1038
1044
1039
1045
def scaled_dot_product_attention (q , k , v , attn_bias , d_model , dropout_rate ):
1040
1046
"""
0 commit comments