Skip to content

Commit b7a809a

Browse files
committed
add download api for dataset
1 parent c1feccd commit b7a809a

File tree

9 files changed

+46
-4
lines changed

9 files changed

+46
-4
lines changed

python/paddle/v2/dataset/cifar.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,8 @@ def test10():
7575
return reader_creator(
7676
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
7777
'test_batch')
78+
79+
80+
def download():
81+
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
82+
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)

python/paddle/v2/dataset/conll05.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,11 @@ def test():
196196
words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
197197
props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
198198
return reader_creator(reader, word_dict, verb_dict, label_dict)
199+
200+
201+
def download():
202+
paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
203+
paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
204+
paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)
205+
paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
206+
paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)

python/paddle/v2/dataset/imdb.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,7 @@ def test(word_idx):
123123
def word_dict():
124124
return build_dict(
125125
re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
126+
127+
128+
def download():
129+
paddle.v2.dataset.common.download(URL, 'imdb', MD5)

python/paddle/v2/dataset/imikolov.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,7 @@ def train(word_idx, n):
8989

9090
def test(word_idx, n):
9191
return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
92+
93+
94+
def download():
95+
paddle.v2.dataset.common.download(URL, "imikolov", MD5)

python/paddle/v2/dataset/mnist.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,8 @@ def test():
106106
TEST_IMAGE_MD5),
107107
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist',
108108
TEST_LABEL_MD5), 100)
109+
110+
111+
def download():
112+
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
113+
paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)

python/paddle/v2/dataset/movielens.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030

3131
age_table = [1, 18, 25, 35, 45, 50, 56]
3232

33+
URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
34+
MD5 = 'c4d9eecfca2ab87c1945afe126590906'
35+
3336

3437
class MovieInfo(object):
3538
def __init__(self, index, categories, title):
@@ -77,10 +80,7 @@ def __repr__(self):
7780

7881

7982
def __initialize_meta_info__():
80-
fn = download(
81-
url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
82-
module_name='movielens',
83-
md5sum='c4d9eecfca2ab87c1945afe126590906')
83+
fn = download(URL, "movielens", MD5)
8484
global MOVIE_INFO
8585
if MOVIE_INFO is None:
8686
pattern = re.compile(r'^(.*)\((\d+)\)$')
@@ -205,5 +205,9 @@ def unittest():
205205
print train_count, test_count
206206

207207

208+
def download():
209+
paddle.v2.dataset.common.download(URL, "movielens", MD5)
210+
211+
208212
if __name__ == '__main__':
209213
unittest()

python/paddle/v2/dataset/sentiment.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,7 @@ def test():
125125
"""
126126
data_set = load_sentiment_data()
127127
return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
128+
129+
130+
def download():
131+
nltk.download('movie_reviews', download_dir=common.DATA_HOME)

python/paddle/v2/dataset/uci_housing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,7 @@ def reader():
8989
yield d[:-1], d[-1:]
9090

9191
return reader
92+
93+
94+
def download():
95+
paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)

python/paddle/v2/dataset/wmt14.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,7 @@ def test(dict_size):
103103
return reader_creator(
104104
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
105105
'test/test', dict_size)
106+
107+
108+
def download():
109+
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)

0 commit comments

Comments
 (0)