Skip to content

Commit cac5ad6

Browse files
committed
Merge branch 'develop' of github.com:baidu/Paddle into feature/add_list_type_of_feeding
2 parents dc02bfd + 4b5a432 commit cac5ad6

File tree

3 files changed

+9
-15
lines changed

3 files changed

+9
-15
lines changed

python/paddle/v2/dataset/common.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,6 @@ def download(url, module_name, md5sum):
6666
return filename
6767

6868

69-
def dict_add(a_dict, ele):
70-
if ele in a_dict:
71-
a_dict[ele] += 1
72-
else:
73-
a_dict[ele] = 1
74-
75-
7669
def fetch_all():
7770
for module_name in filter(lambda x: not x.startswith("__"),
7871
dir(paddle.v2.dataset)):

python/paddle/v2/dataset/imdb.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"""
1919

2020
import paddle.v2.dataset.common
21+
import collections
2122
import tarfile
2223
import Queue
2324
import re
@@ -48,10 +49,10 @@ def tokenize(pattern):
4849

4950

5051
def build_dict(pattern, cutoff):
51-
word_freq = {}
52+
word_freq = collections.defaultdict(int)
5253
for doc in tokenize(pattern):
5354
for word in doc:
54-
paddle.v2.dataset.common.dict_add(word_freq, word)
55+
word_freq[word] += 1
5556

5657
# Not sure if we should prune less-frequent words here.
5758
word_freq = filter(lambda x: x[1] > cutoff, word_freq.items())

python/paddle/v2/dataset/imikolov.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
Complete comments.
1818
"""
1919
import paddle.v2.dataset.common
20+
import collections
2021
import tarfile
2122

2223
__all__ = ['train', 'test', 'build_dict']
@@ -26,15 +27,14 @@
2627

2728

2829
def word_count(f, word_freq=None):
29-
add = paddle.v2.dataset.common.dict_add
30-
if word_freq == None:
31-
word_freq = {}
30+
if word_freq is None:
31+
word_freq = collections.defaultdict(int)
3232

3333
for l in f:
3434
for w in l.strip().split():
35-
add(word_freq, w)
36-
add(word_freq, '<s>')
37-
add(word_freq, '<e>')
35+
word_freq[w] += 1
36+
word_freq['<s>'] += 1
37+
word_freq['<e>'] += 1
3838

3939
return word_freq
4040

0 commit comments

Comments
 (0)