PaddlePaddle
diff --git a/‎doc/api/v2/data.rst‎
Lines changed: 9 additions & 4 deletions b/‎doc/api/v2/data.rst‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎doc/api/v2/run_logic.rst‎
Lines changed: 3 additions & 10 deletions b/‎doc/api/v2/run_logic.rst‎
Lines changed: 3 additions & 10 deletions
diff --git a/‎python/paddle/v2/data_feeder.py‎
Lines changed: 1 addition & 1 deletion b/‎python/paddle/v2/data_feeder.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/paddle/v2/dataset/cifar.py‎
Lines changed: 1 addition & 1 deletion b/‎python/paddle/v2/dataset/cifar.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/paddle/v2/dataset/conll05.py‎
Lines changed: 21 additions & 6 deletions b/‎python/paddle/v2/dataset/conll05.py‎
Lines changed: 21 additions & 6 deletions
diff --git a/‎python/paddle/v2/dataset/imdb.py‎
Lines changed: 41 additions & 3 deletions b/‎python/paddle/v2/dataset/imdb.py‎
Lines changed: 41 additions & 3 deletions
diff --git a/‎python/paddle/v2/dataset/imikolov.py‎
Lines changed: 32 additions & 2 deletions b/‎python/paddle/v2/dataset/imikolov.py‎
Lines changed: 32 additions & 2 deletions
diff --git a/‎python/paddle/v2/dataset/mnist.py‎
Lines changed: 1 addition & 1 deletion b/‎python/paddle/v2/dataset/mnist.py‎
Lines changed: 1 addition & 1 deletion
@@ -49,7 +49,6 @@ mnist
     :members:
     :noindex:
 
-
 cifar
 +++++
 
@@ -61,7 +60,7 @@ conll05
 +++++++
 
 ..  automodule:: paddle.v2.dataset.conll05
-    :members:
+    :members: get_dict,get_embedding,test
     :noindex:
 
 imdb
@@ -79,12 +78,18 @@ imikolov
     :noindex:
 
 movielens
-+++++++++
++++++++++    
 
 ..  automodule:: paddle.v2.dataset.movielens
     :members:
     :noindex:
 
+..  autoclass:: paddle.v2.dataset.movielens.MovieInfo
+    :noindex:
+    
+..  autoclass:: paddle.v2.dataset.movielens.UserInfo
+    :noindex:
+
 sentiment
 +++++++++
 
@@ -102,7 +107,7 @@ uci_housing
 wmt14
 +++++
 
-..  automodule:: paddle.v2.dataset.uci_housing
+..  automodule:: paddle.v2.dataset.wmt14
     :members:
     :noindex:
 
@@ -13,25 +13,18 @@ Trainer
 =======
 
 ..  automodule:: paddle.v2.trainer
-    :members: Trainer
+    :members: SGD
     :noindex:
 
 Event
 =====
 
 ..  automodule:: paddle.v2.event
-    :members: Event
+    :members: 
     :noindex:
 
 Inference
 =========
 
-..  automodule:: paddle.v2.inference
-    :members: Inference
-    :noindex:
-
 ..  autofunction:: paddle.v2.infer
-    :members:
-    :noindex:
-
-
+    :noindex:
@@ -52,7 +52,7 @@ class DataFeeder(DataProviderConverter):
         #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ],  # first sample
         #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
         #                     ]
-        arg = feeder(minibatch_data)
+        arg = feeder.convert(minibatch_data)
 
     ..  note::
 
 
@@ -15,7 +15,7 @@
 CIFAR dataset.
 
 This module will download dataset from https://www.cs.toronto.edu/~kriz/cifar.html and
-parse train set and test set into paddle reader creators.
+parse train/test set into paddle reader creators.
 
 The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 
 images per class. There are 50000 training images and 10000 test images.
 
@@ -12,12 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
-dataset as an example. Because Conll 2005 is not free in public, the default
-downloaded URL is test set of Conll 2005 (which is public). Users can change
-URL and MD5 to their Conll dataset.
-
-TODO(yuyang18): Complete comments.
+Conll05 dataset.  
+Paddle semantic role labeling Book and demo use this dataset as an example. Because 
+Conll05 is not free in public, the default downloaded URL is test set of 
+Conll05 (which is public). Users can change URL and MD5 to their Conll dataset. 
+And a pre-trained word vector model based on Wikipedia corpus is used to initialize SRL model.
 """
 
 import tarfile
@@ -180,17 +179,33 @@ def reader():
 
 
 def get_dict():
+    """
+    Get the word, verb and label dictionary of Wikipedia corpus.
+    """
     word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
     verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
     label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
     return word_dict, verb_dict, label_dict
 
 
 def get_embedding():
+    """
+    Get the trained word vector based on Wikipedia corpus.
+    """
     return download(EMB_URL, 'conll05st', EMB_MD5)
 
 
 def test():
+    """
+    Conll05 test set creator.
+
+    Because the train dataset is not free, the test dataset is used for training.
+    It returns a reader creator, each sample in the reader is nine features, including sentence 
+    sequence, predicate, predicate context, predicate context flag and tagged sequence.
+
+    :return: Train reader creator
+    :rtype: callable
+    """
     word_dict, verb_dict, label_dict = get_dict()
     reader = corpus_reader(
         download(DATA_URL, 'conll05st', DATA_MD5),
 
@@ -12,9 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+IMDB dataset.
 
-TODO(yuyang18): Complete comments.
+This module download IMDB dataset from 
+http://ai.stanford.edu/%7Eamaas/data/sentiment/, which contains a set of 25,000 
+highly polar movie reviews for training, and 25,000 for testing. Besides, this 
+module also provides API for build dictionary and parse train set and test set 
+into paddle reader creators.
 """
 
 import paddle.v2.dataset.common
@@ -30,8 +34,11 @@
 MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
 
 
-# Read files that match pattern.  Tokenize and yield each file.
 def tokenize(pattern):
+    """
+    Read files that match pattern.  Tokenize and yield each file.
+    """
+
     with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
                                                         MD5)) as tarf:
         # Note that we should use tarfile.next(), which does
@@ -48,6 +55,9 @@ def tokenize(pattern):
 
 
 def build_dict(pattern, cutoff):
+    """
+    Build a word dictionary, the key is word, and the value is index.
+    """
     word_freq = {}
     for doc in tokenize(pattern):
         for word in doc:
@@ -109,18 +119,46 @@ def reader():
 
 
 def train(word_idx):
+    """
+    IMDB train set creator.
+
+    It returns a reader creator, each sample in the reader is an index 
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Train reader creator
+    :rtype: callable
+    """
     return reader_creator(
         re.compile("aclImdb/train/pos/.*\.txt$"),
         re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000)
 
 
 def test(word_idx):
+    """
+    IMDB test set creator.
+
+    It returns a reader creator, each sample in the reader is an index 
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Test reader creator
+    :rtype: callable
+    """
     return reader_creator(
         re.compile("aclImdb/test/pos/.*\.txt$"),
         re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
 
 
 def word_dict():
+    """
+    Build word dictionary.
+
+    :return: Word dictionary
+    :rtype: dict
+    """
     return build_dict(
         re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
 
 
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
+imikolov's simple dataset.
 
-Complete comments.
+This module will download dataset from http://www.fit.vutbr.cz/~imikolov/rnnlm/ and
+parse train/test set into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import tarfile
@@ -40,6 +41,9 @@ def word_count(f, word_freq=None):
 
 
 def build_dict():
+    """
+    Build a word dictionary, the key is word, and the value is index.
+    """
     train_filename = './simple-examples/data/ptb.train.txt'
     test_filename = './simple-examples/data/ptb.valid.txt'
     with tarfile.open(
@@ -84,10 +88,36 @@ def reader():
 
 
 def train(word_idx, n):
+    """
+    imikolov train set creator.
+
+    It returns a reader creator, each sample in the reader is an index 
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Train reader creator
+    :rtype: callable
+    """
     return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
 
 
 def test(word_idx, n):
+    """
+    imikolov test set creator.
+
+    It returns a reader creator, each sample in the reader is an index 
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Train reader creator
+    :rtype: callable
+    """
     return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
 
 
 
@@ -15,7 +15,7 @@
 MNIST dataset.
 
 This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
-parse train set and test set into paddle reader creators.
+parse train/test set into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import subprocess