Skip to content

Commit a24db02

Browse files
committed
Fix data download of wmt14
test=develop
1 parent 9d7c3b1 commit a24db02

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

python/paddle/dataset/wmt16.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,16 @@ def __build_dict(tar_file, dict_size, save_path, lang):
7171
for w in sen.split():
7272
word_dict[w] += 1
7373

74-
with open(save_path, "w") as fout:
75-
fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))
74+
with open(save_path, "wb") as fout:
75+
fout.write(
76+
cpt.to_bytes("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)))
7677
for idx, word in enumerate(
7778
sorted(
7879
six.iteritems(word_dict), key=lambda x: x[1],
7980
reverse=True)):
8081
if idx + 3 == dict_size: break
81-
fout.write(word[0].encode('utf-8'))
82-
fout.write('\n')
82+
fout.write(cpt.to_bytes(word[0]))
83+
fout.write(cpt.to_bytes('\n'))
8384

8485

8586
def __load_dict(tar_file, dict_size, lang, reverse=False):

0 commit comments

Comments
 (0)