Skip to content

Commit 9f417f1

Browse files
committed
add some dateset docs
1 parent 0690a9f commit 9f417f1

File tree

5 files changed

+66
-11
lines changed

5 files changed

+66
-11
lines changed

doc/api/v2/data.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
========
2-
Datasets
3-
========
1+
==================================
2+
Data Reader Inferface and DataSets
3+
==================================
44

55

66
DataTypes

doc/api/v2/run_logic.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ Event
2626
Inference
2727
=========
2828

29-
.. autofunction:: paddle.v2.infer
29+
.. automodule:: paddle.v2.inference
3030
:members: Inference
3131
:noindex:
32+
33+
.. autofunction:: paddle.v2.infer
34+
:members:
35+
:noindex:
36+
37+

python/paddle/v2/dataset/cifar.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,17 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
"""
15-
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
15+
CIFAR dataset.
16+
17+
This module will download dataset from https://www.cs.toronto.edu/~kriz/cifar.html and
18+
parse train set and test set into paddle reader creators.
19+
20+
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000
21+
images per class. There are 50000 training images and 10000 test images.
22+
23+
The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes containing
24+
600 images each. There are 500 training images and 100 testing images per class.
1625
17-
TODO(yuyang18): Complete the comments.
1826
"""
1927

2028
import cPickle
@@ -54,20 +62,56 @@ def reader():
5462

5563

5664
def train100():
65+
"""
66+
CIFAR-100 train set creator.
67+
68+
It returns a reader creator, each sample in the reader is image pixels in
69+
[0, 1] and label in [0, 99].
70+
71+
:return: Train reader creator
72+
:rtype: callable
73+
"""
5774
return reader_creator(
5875
download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train')
5976

6077

6178
def test100():
79+
"""
80+
CIFAR-100 test set cretor.
81+
82+
It returns a reader creator, each sample in the reader is image pixels in
83+
[0, 1] and label in [0, 9].
84+
85+
:return: Test reader creator.
86+
:rtype: callable
87+
"""
6288
return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test')
6389

6490

6591
def train10():
92+
"""
93+
CIFAR-10 train set creator.
94+
95+
It returns a reader creator, each sample in the reader is image pixels in
96+
[0, 1] and label in [0, 9].
97+
98+
:return: Train reader creator
99+
:rtype: callable
100+
"""
66101
return reader_creator(
67102
download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch')
68103

69104

70105
def test10():
106+
"""
107+
CIFAR-10 test set cretor.
108+
109+
It returns a reader creator, each sample in the reader is image pixels in
110+
[0, 1] and label in [0, 9].
111+
112+
:return: Test reader creator.
113+
:rtype: callable
114+
"""
71115
return reader_creator(
72116
download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch')
73117

python/paddle/v2/dataset/conll05.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
15-
import tarfile
16-
import gzip
17-
import itertools
18-
from common import download
1914
"""
2015
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
2116
dataset as an example. Because Conll 2005 is not free in public, the default
@@ -25,6 +20,12 @@
2520
TODO(yuyang18): Complete comments.
2621
"""
2722

23+
import tarfile
24+
import gzip
25+
import itertools
26+
from common import download
27+
28+
2829
__all__ = ['test, get_dict', 'get_embedding']
2930

3031
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'

python/paddle/v2/dataset/movielens.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
"""
1515
Movielens 1-M dataset.
1616
17+
GroupLens Research collected and made available rating data sets from the
18+
MovieLens web site (http://movielens.org). Movielens 1-M dataset contains 1 million
19+
ratings from 6000 users on 4000 movies.
20+
1721
TODO(yuyang18): Complete comments.
1822
"""
1923

0 commit comments

Comments
 (0)