Skip to content

Commit a2cec42

Browse files
committed
In response to comments from Wen-bo
1 parent da6af59 commit a2cec42

File tree

1 file changed

+1
-4
lines changed

1 file changed

+1
-4
lines changed

python/paddle/v2/dataset/imikolov.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
"""
44
import paddle.v2.dataset.common
55
import tarfile
6-
import collections
76

87
__all__ = ['train', 'test']
98

@@ -40,10 +39,8 @@ def build_dict(train_filename, test_filename):
4039
testf = tf.extractfile(test_filename)
4140
word_freq = word_count(testf, word_count(trainf))
4241

43-
STOPWORD_FREQ = 3000
4442
TYPO_FREQ = 50
45-
word_freq = filter(lambda x: x[1] > TYPO_FREQ and x[1] < STOPWORD_FREQ,
46-
word_freq.items())
43+
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
4744

4845
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
4946
words, _ = list(zip(*dictionary))

0 commit comments

Comments
 (0)