Refine LinearCRF

reyoung · reyoung · commit 8c9041f486fd · 2018-06-08T17:42:22.000+08:00
diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc
@@ -67,8 +67,6 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
         "mini-batch. Note: S is equal to the sequence number in a mini-batch. "
         "The output is no longer a LoDTensor.");
     AddComment(R"DOC(
-LinearChainCRF Operator.
-
 Conditional Random Field defines an undirected probabilistic graph with nodes
 denoting random variables and edges denoting dependencies between these
 variables. CRF learns the conditional probability $P(Y|X)$, where
diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py
@@ -224,6 +224,9 @@ def __impl__(func):
     return __impl__
 
 
+_inline_math_single_dollar = re.compile(r"\$([^\$]+)\$")
+
+
 def templatedoc(op_type=None):
     """
     Decorator of layer function. It will use the docstring from the layer
@@ -241,6 +244,9 @@ def templatedoc(op_type=None):
     def trim_ending_dot(msg):
         return msg.rstrip('.')
 
+    def escape_inline_math(msg):
+        return _inline_math_single_dollar.sub(repl=r':math:`\1`', string=msg)
+
     def __impl__(func):
         if op_type is None:
             op_type_name = func.__name__
@@ -254,8 +260,10 @@ def __impl__(func):
         for line in comment_lines:
             line = line.strip()
             if len(line) != 0:
-                comment += line
+                comment += escape_inline_math(line)
                 comment += " "
+            elif len(comment) != 0:
+                comment += "\n    \n    "
 
         args = {"comment": trim_ending_dot(comment)}
         for each_input in op_proto.inputs:
diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -11,6 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+When training a model, it's often useful to decay the
+learning rate during training process, this is called
+learning_rate_decay. There are many strategies to do
+this, this module will provide some classical method.
+User can also implement their own learning_rate_decay
+strategy according to this module.
+"""
 
 import control_flow
 import nn
@@ -22,14 +30,6 @@
     'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
     'polynomial_decay', 'piecewise_decay', 'noam_decay'
 ]
-"""
-When training a model, it's often useful to decay the
-learning rate during training process, this is called
-learning_rate_decay. There are many strategies to do
-this, this module will provide some classical method.
-User can also implement their own learning_rate_decay
-strategy according to this module.
-"""
 
 
 def _decay_step_counter(begin=0):
@@ -41,18 +41,20 @@ def _decay_step_counter(begin=0):
 
 
 def noam_decay(d_model, warmup_steps):
-    """Apply decay to learning rate.
-    ```python
-    lr_value = np.power(d_model, -0.5) * np.min([
-            np.power(current_steps, -0.5),
-            np.power(warmup_steps, -1.5) * current_steps
-        ])
-    ```
+    """
+    Noam decay method. The numpy implementation of noam decay as follows.
+
+    >>> import numpy as np
+    >>> lr_value = np.power(d_model, -0.5) * np.min([
+    >>>                         np.power(current_steps, -0.5),
+    >>>                         np.power(warmup_steps, -1.5) * current_steps])
+
+    Please reference `attention is all you need
+    <https://arxiv.org/pdf/1706.03762.pdf>`_.
 
     Args:
         d_model(Variable): The dimensionality of input and output of model.
-            Reference: attention is all you need
-                https://arxiv.org/pdf/1706.03762.pdf
+
         warmup_steps(Variable): A super parameter.
 
     Returns: