PaddlePaddle
diff --git a/‎doc/api/v2/config/optimizer.rst‎
Lines changed: 0 additions & 2 deletions b/‎doc/api/v2/config/optimizer.rst‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/api/v2/data.rst‎
Lines changed: 11 additions & 6 deletions b/‎doc/api/v2/data.rst‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎doc/api/v2/run_logic.rst‎
Lines changed: 4 additions & 0 deletions b/‎doc/api/v2/run_logic.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎python/paddle/v2/data_feeder.py‎
Lines changed: 1 addition & 1 deletion b/‎python/paddle/v2/data_feeder.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/paddle/v2/dataset/cifar.py‎
Lines changed: 49 additions & 2 deletions b/‎python/paddle/v2/dataset/cifar.py‎
Lines changed: 49 additions & 2 deletions
diff --git a/‎python/paddle/v2/dataset/conll05.py‎
Lines changed: 25 additions & 8 deletions b/‎python/paddle/v2/dataset/conll05.py‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎python/paddle/v2/dataset/imdb.py‎
Lines changed: 41 additions & 3 deletions b/‎python/paddle/v2/dataset/imdb.py‎
Lines changed: 41 additions & 3 deletions
diff --git a/‎python/paddle/v2/dataset/imikolov.py‎
Lines changed: 34 additions & 2 deletions b/‎python/paddle/v2/dataset/imikolov.py‎
Lines changed: 34 additions & 2 deletions
@@ -1,5 +1,3 @@
-..  _api_v2.optimizer:
-
 ==========
 Optimizer
 ==========
 
@@ -1,6 +1,6 @@
-========
-Datasets
-========
+==================================
+Data Reader Interface and DataSets
+==================================
 
 
 DataTypes
@@ -49,7 +49,6 @@ mnist
     :members:
     :noindex:
 
-
 cifar
 +++++
 
@@ -61,7 +60,7 @@ conll05
 +++++++
 
 ..  automodule:: paddle.v2.dataset.conll05
-    :members:
+    :members: get_dict,get_embedding,test
     :noindex:
 
 imdb
@@ -85,6 +84,12 @@ movielens
     :members:
     :noindex:
 
+..  autoclass:: paddle.v2.dataset.movielens.MovieInfo
+    :noindex:
+    
+..  autoclass:: paddle.v2.dataset.movielens.UserInfo
+    :noindex:
+
 sentiment
 +++++++++
 
@@ -102,7 +107,7 @@ uci_housing
 wmt14
 +++++
 
-..  automodule:: paddle.v2.dataset.uci_housing
+..  automodule:: paddle.v2.dataset.wmt14
     :members:
     :noindex:
 
@@ -6,22 +6,26 @@ Parameters
 ==========
 
 ..  automodule:: paddle.v2.parameters
+    :members: Parameters
     :noindex:
 
 Trainer
 =======
 
 ..  automodule:: paddle.v2.trainer
+    :members: SGD
     :noindex:
 
 Event
 =====
 
 ..  automodule:: paddle.v2.event
+    :members:
     :noindex:
 
 Inference
 =========
 
 ..  autofunction:: paddle.v2.infer
     :noindex:
+    
@@ -67,7 +67,7 @@ class DataFeeder(DataProviderConverter):
         #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ],  # first sample
         #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
         #                     ]
-        arg = feeder(minibatch_data)
+        arg = feeder.convert(minibatch_data)
 
     ..  note::
 
 
@@ -12,9 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
+CIFAR dataset.
+
+This module will download dataset from
+https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
+paddle reader creators.
+
+The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+with 6000 images per class. There are 50000 training images and 10000 test
+images.
+
+The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
+containing 600 images each. There are 500 training images and 100 testing
+images per class.
 
-TODO(yuyang18): Complete the comments.
 """
 
 import cPickle
@@ -54,20 +65,56 @@ def reader():
 
 
 def train100():
+    """
+    CIFAR-100 training set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 99].
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator(
         download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train')
 
 
 def test100():
+    """
+    CIFAR-100 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
     return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test')
 
 
 def train10():
+    """
+    CIFAR-10 training set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator(
         download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch')
 
 
 def test10():
+    """
+    CIFAR-10 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
     return reader_creator(
         download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch')
 
 
@@ -11,19 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Conll05 dataset.
+Paddle semantic role labeling Book and demo use this dataset as an example.
+Because Conll05 is not free in public, the default downloaded URL is test set
+of Conll05 (which is public). Users can change URL and MD5 to their Conll
+dataset. And a pre-trained word vector model based on Wikipedia corpus is used
+to initialize SRL model.
+"""
 
 import tarfile
 import gzip
 import itertools
 from common import download
-"""
-Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
-dataset as an example. Because Conll 2005 is not free in public, the default
-downloaded URL is test set of Conll 2005 (which is public). Users can change
-URL and MD5 to their Conll dataset.
-
-TODO(yuyang18): Complete comments.
-"""
 
 __all__ = ['test, get_dict', 'get_embedding']
 
@@ -179,17 +179,34 @@ def reader():
 
 
 def get_dict():
+    """
+    Get the word, verb and label dictionary of Wikipedia corpus.
+    """
     word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
     verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
     label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
     return word_dict, verb_dict, label_dict
 
 
 def get_embedding():
+    """
+    Get the trained word vector based on Wikipedia corpus.
+    """
     return download(EMB_URL, 'conll05st', EMB_MD5)
 
 
 def test():
+    """
+    Conll05 test set creator.
+
+    Because the training dataset is not free, the test dataset is used for
+    training. It returns a reader creator, each sample in the reader is nine
+    features, including sentence sequence, predicate, predicate context,
+    predicate context flag and tagged sequence.
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     word_dict, verb_dict, label_dict = get_dict()
     reader = corpus_reader(
         download(DATA_URL, 'conll05st', DATA_MD5),
 
@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+IMDB dataset.
 
-TODO(yuyang18): Complete comments.
+This module downloads IMDB dataset from
+http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set
+of 25,000 highly polar movie reviews for training, and 25,000 for testing.
+Besides, this module also provides API for building dictionary.
 """
 
 import paddle.v2.dataset.common
@@ -31,8 +34,11 @@
 MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
 
 
-# Read files that match pattern.  Tokenize and yield each file.
 def tokenize(pattern):
+    """
+    Read files that match the given pattern.  Tokenize and yield each file.
+    """
+
     with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
                                                         MD5)) as tarf:
         # Note that we should use tarfile.next(), which does
@@ -49,6 +55,10 @@ def tokenize(pattern):
 
 
 def build_dict(pattern, cutoff):
+    """
+    Build a word dictionary from the corpus. Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
     word_freq = collections.defaultdict(int)
     for doc in tokenize(pattern):
         for word in doc:
@@ -110,18 +120,46 @@ def reader():
 
 
 def train(word_idx):
+    """
+    IMDB training set creator.
+
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator(
         re.compile("aclImdb/train/pos/.*\.txt$"),
         re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000)
 
 
 def test(word_idx):
+    """
+    IMDB test set creator.
+
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Test reader creator
+    :rtype: callable
+    """
     return reader_creator(
         re.compile("aclImdb/test/pos/.*\.txt$"),
         re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
 
 
 def word_dict():
+    """
+    Build a word dictionary from the corpus.
+
+    :return: Word dictionary
+    :rtype: dict
+    """
     return build_dict(
         re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
 
 
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
+imikolov's simple dataset.
 
-Complete comments.
+This module will download dataset from 
+http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
+into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import collections
@@ -40,6 +42,10 @@ def word_count(f, word_freq=None):
 
 
 def build_dict():
+    """
+    Build a word dictionary from the corpus,  Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
     train_filename = './simple-examples/data/ptb.train.txt'
     test_filename = './simple-examples/data/ptb.valid.txt'
     with tarfile.open(
@@ -84,10 +90,36 @@ def reader():
 
 
 def train(word_idx, n):
+    """
+    imikolov training set creator.
+
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
 
 
 def test(word_idx, n):
+    """
+    imikolov test set creator.
+
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Test reader creator
+    :rtype: callable
+    """
     return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-.. _api_v2.optimizer:`
`2`		`-`
`3`	`1`	`==========`
`4`	`2`	`Optimizer`
`5`	`3`	`==========`