Skip to content

Commit e6a5c91

Browse files
authored
Merge pull request #9583 from helinwang/v2_comp
Add paddle.v2.reader,dataset back for backward compatibility
2 parents 991b582 + f801f74 commit e6a5c91

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+4718
-3
lines changed

python/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ if (WITH_TESTING)
8181
# enable v2 API unittest only when paddle swig api is compiled
8282
add_subdirectory(paddle/v2/tests)
8383
add_subdirectory(paddle/v2/plot/tests)
84+
add_subdirectory(paddle/v2/reader/tests)
8485
endif()
8586
endif()
8687
add_subdirectory(paddle/fluid/tests)

python/paddle/dataset/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
'cifar',
3838
'movielens',
3939
'conll05',
40-
'sentiment'
40+
'sentiment',
4141
'uci_housing',
4242
'wmt14',
4343
'wmt16',

python/paddle/v2/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,17 @@
2222
import topology
2323
import networks
2424
import evaluator
25+
from . import dataset
26+
from . import reader
2527
from . import plot
2628
import attr
2729
import op
2830
import pooling
2931
import inference
3032
import networks
33+
import minibatch
3134
import plot
35+
import image
3236
import paddle.trainer.config_parser as cp
3337

3438
__all__ = [
@@ -44,11 +48,14 @@
4448
'data_type',
4549
'attr',
4650
'pooling',
51+
'dataset',
52+
'reader',
4753
'topology',
4854
'networks',
4955
'infer',
5056
'plot',
5157
'evaluator',
58+
'image',
5259
'master',
5360
]
5461

@@ -146,3 +153,4 @@ def init(**kwargs):
146153

147154

148155
infer = inference.infer
156+
batch = minibatch.batch

python/paddle/v2/dataset/__init__.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""
15+
Dataset package.
16+
"""
17+
18+
import mnist
19+
import imikolov
20+
import imdb
21+
import cifar
22+
import movielens
23+
import conll05
24+
import uci_housing
25+
import sentiment
26+
import wmt14
27+
import wmt16
28+
import mq2007
29+
import flowers
30+
import voc2012
31+
32+
__all__ = [
33+
'mnist',
34+
'imikolov',
35+
'imdb',
36+
'cifar',
37+
'movielens',
38+
'conll05',
39+
'sentiment',
40+
'uci_housing',
41+
'wmt14',
42+
'wmt16',
43+
'mq2007',
44+
'flowers',
45+
'voc2012',
46+
]

python/paddle/v2/dataset/cifar.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""
15+
CIFAR dataset.
16+
17+
This module will download dataset from
18+
https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
19+
paddle reader creators.
20+
21+
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
22+
with 6000 images per class. There are 50000 training images and 10000 test
23+
images.
24+
25+
The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
26+
containing 600 images each. There are 500 training images and 100 testing
27+
images per class.
28+
29+
"""
30+
31+
import cPickle
32+
import itertools
33+
import numpy
34+
import paddle.v2.dataset.common
35+
import tarfile
36+
37+
__all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
38+
39+
URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
40+
CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
41+
CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
42+
CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
43+
CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
44+
45+
46+
def reader_creator(filename, sub_name):
47+
def read_batch(batch):
48+
data = batch['data']
49+
labels = batch.get('labels', batch.get('fine_labels', None))
50+
assert labels is not None
51+
for sample, label in itertools.izip(data, labels):
52+
yield (sample / 255.0).astype(numpy.float32), int(label)
53+
54+
def reader():
55+
with tarfile.open(filename, mode='r') as f:
56+
names = (each_item.name for each_item in f
57+
if sub_name in each_item.name)
58+
59+
for name in names:
60+
batch = cPickle.load(f.extractfile(name))
61+
for item in read_batch(batch):
62+
yield item
63+
64+
return reader
65+
66+
67+
def train100():
68+
"""
69+
CIFAR-100 training set creator.
70+
71+
It returns a reader creator, each sample in the reader is image pixels in
72+
[0, 1] and label in [0, 99].
73+
74+
:return: Training reader creator
75+
:rtype: callable
76+
"""
77+
return reader_creator(
78+
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
79+
'train')
80+
81+
82+
def test100():
83+
"""
84+
CIFAR-100 test set creator.
85+
86+
It returns a reader creator, each sample in the reader is image pixels in
87+
[0, 1] and label in [0, 9].
88+
89+
:return: Test reader creator.
90+
:rtype: callable
91+
"""
92+
return reader_creator(
93+
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
94+
'test')
95+
96+
97+
def train10():
98+
"""
99+
CIFAR-10 training set creator.
100+
101+
It returns a reader creator, each sample in the reader is image pixels in
102+
[0, 1] and label in [0, 9].
103+
104+
:return: Training reader creator
105+
:rtype: callable
106+
"""
107+
return reader_creator(
108+
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
109+
'data_batch')
110+
111+
112+
def test10():
113+
"""
114+
CIFAR-10 test set creator.
115+
116+
It returns a reader creator, each sample in the reader is image pixels in
117+
[0, 1] and label in [0, 9].
118+
119+
:return: Test reader creator.
120+
:rtype: callable
121+
"""
122+
return reader_creator(
123+
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
124+
'test_batch')
125+
126+
127+
def fetch():
128+
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
129+
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
130+
131+
132+
def convert(path):
133+
"""
134+
Converts dataset to recordio format
135+
"""
136+
paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100")
137+
paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100")
138+
paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10")
139+
paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10")

0 commit comments

Comments
 (0)