We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent da6af59 commit a2cec42Copy full SHA for a2cec42
python/paddle/v2/dataset/imikolov.py
@@ -3,7 +3,6 @@
3
"""
4
import paddle.v2.dataset.common
5
import tarfile
6
-import collections
7
8
__all__ = ['train', 'test']
9
@@ -40,10 +39,8 @@ def build_dict(train_filename, test_filename):
40
39
testf = tf.extractfile(test_filename)
41
word_freq = word_count(testf, word_count(trainf))
42
43
- STOPWORD_FREQ = 3000
44
TYPO_FREQ = 50
45
- word_freq = filter(lambda x: x[1] > TYPO_FREQ and x[1] < STOPWORD_FREQ,
46
- word_freq.items())
+ word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
47
48
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
49
words, _ = list(zip(*dictionary))
0 commit comments