PaddlePaddle
diff --git a/‎python/paddle/fluid/clip.py
Lines changed: 123 additions & 11 deletions b/‎python/paddle/fluid/clip.py
Lines changed: 123 additions & 11 deletions
diff --git a/‎python/paddle/fluid/inferencer.py
Lines changed: 37 additions & 9 deletions b/‎python/paddle/fluid/inferencer.py
Lines changed: 37 additions & 9 deletions
@@ -24,8 +24,6 @@
     'GradientClipByValue',
     'GradientClipByNorm',
     'GradientClipByGlobalNorm',
-    'append_gradient_clip_ops',
-    'error_clip_callback',
 ]
 
 
@@ -38,6 +36,25 @@ def append_clip_op(self, block, grad_name):
 
 
 class ErrorClipByValue(BaseErrorClipAttr):
+    """
+    Clips tensor values to the range [min, max].
+
+    Given a tensor t, this operation clips its value to min and max inplace.
+
+    - Any values less than min are set to min.
+    - Any values greater than max are set to max.
+
+    Args:
+        max (float): The maximum value to clip by.
+        min (float, optional): The minimum value to clip by. if not set by user, \
+        will be set to -max by framework.
+
+    Examples:
+        .. code-block:: python
+
+            var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
+    """
+
     def __init__(self, max, min=None):
         max = float(max)
         if min is None:
@@ -99,6 +116,31 @@ def create_operators(self, param, grad):
 
 
 class GradientClipByValue(BaseGradientClipAttr):
+    """
+    Clips gradient values to the range [min, max].
+
+    Given a tensor t, this operation clips its value to min and max inplace.
+
+    - Any values less than min are set to min.
+    - Any values greater than max are set to max.
+
+    Args:
+        max (float): The maximum value to clip by.
+        min (float, optional): The minimum value to clip by. if not set by user, \
+        will be set to -max by framework.
+
+    Examples:
+        .. code-block:: python
+
+            w_param_attrs = ParamAttr(name=None,
+              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+              learning_rate=1.0,
+              regularizer=L1Decay(1.0),
+              trainable=True,
+              clip=GradientClipByValue(-1.0, 1.0))
+            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
+    """
+
     def __init__(self, max, min=None):
         max = float(max)
         if min is None:
@@ -120,6 +162,37 @@ def create_operators(self, param, grad):
 
 
 class GradientClipByNorm(BaseGradientClipAttr):
+    """
+    Clips tensor values to a maximum L2-norm.
+
+    This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
+    If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
+    will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
+    :math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
+    :math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
+
+    .. math::
+
+        Out = \\frac{max\_norm * X}{norm(X)},
+
+    where :math:`norm(X)` represents the L2 norm of :math:`X`.
+
+    Args:
+        clip_norm (float): The maximum norm value
+
+    Examples:
+        .. code-block:: python
+
+            w_param_attrs = ParamAttr(name=None,
+              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+              learning_rate=1.0,
+              regularizer=L1Decay(1.0),
+              trainable=True,
+              clip=GradientClipByNorm(clip_norm=2.0))
+            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
+
+    """
+
     def __init__(self, clip_norm):
         self.clip_norm = clip_norm
 
@@ -135,6 +208,44 @@ def create_operators(self, param, grad):
 
 
 class GradientClipByGlobalNorm(BaseGradientClipAttr):
+    """
+    Clips values of multiple tensors by the ratio of the sum of their norms.
+
+    Given a list of tensors t_list, and a clipping ratio clip_norm, this
+    operation returns a list of clipped tensors list_clipped and the global
+    norm (global_norm) of all tensors in t_list.
+
+    To perform the clipping, the values :math:`t\_list[i]` are set to:
+
+    .. math::
+
+        t\_list[i] = t\_list[i] * \\frac{clip\_norm}{\max(global\_norm, clip\_norm)}
+
+    where:
+
+    .. math::
+
+        global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
+
+    If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are,
+    otherwise they're all shrunk by the global ratio.
+
+    Args:
+        clip_norm (float): The maximum norm value
+        group_name (str, optional): The group name for this clip.
+
+    Examples:
+        .. code-block:: python
+
+            p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
+
+            with fluid.program_guard(main_program=prog_clip):
+                fluid.clip.set_gradient_clip(
+                    fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
+                p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
+
+    """
+
     def __init__(self, clip_norm, group_name="default_group"):
         if not isinstance(group_name, basestring):
             raise TypeError("'group_name' must be a basestring.")
@@ -183,15 +294,16 @@ def create_operators(self, param, grad):
 
 def set_gradient_clip(clip, param_list=None, program=None):
     """
-        To specify parameters that require gradient clip.
-        Args:
-            clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, 
-                    which describes the type and detailed attributes of required gradient clip.
-            param_list(list, None by default): Parameters that require gradient clip. 
-                    It can be a list of parameter or a list of parameter's name. 
-                    When it's None, all parameters in the program will be included. 
-            program(Program, None by default): The program where parameters are. 
-                    Will be the default main program when assigned with None.
+    To specify parameters that require gradient clip.
+
+    Args:
+        clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
+                which describes the type and detailed attributes of required gradient clip.
+        param_list(list(Variable)): Parameters that require gradient clip.
+                It can be a list of parameter or a list of parameter's name.
+                When it's None, all parameters in the program will be included.
+        program(Program): The program where parameters are.
+                Will be the default main program when assigned with None.
     """
     if not isinstance(clip, BaseGradientClipAttr):
         raise TypeError(
 
@@ -27,13 +27,30 @@
 
 
 class Inferencer(object):
+    """
+    Inferencer High Level API.
+
+    Args:
+        infer_func (Python func): Infer function that will return predict Variable
+        param_path (str): The path where the inference model is saved by fluid.io.save_params
+        place (Place): place to do the inference
+        parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
+
+    Examples:
+        .. code-block:: python
+
+            def inference_program():
+                x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+                y_predict = fluid.layers.fc(input=x, size=1, act=None)
+                return y_predict
+
+            place = fluid.CPUPlace()
+            inferencer = fluid.Inferencer(
+                infer_func=inference_program, param_path="/tmp/model", place=place)
+
+    """
+
     def __init__(self, infer_func, param_path, place=None, parallel=False):
-        """
-        :param infer_func: a function that will return predict Variable
-        :param param_path: the path where the inference model is saved by fluid.io.save_params
-        :param place: place to do the inference
-        :param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
-        """
         self.param_path = param_path
         self.scope = core.Scope()
         self.parallel = parallel
@@ -60,9 +77,20 @@ def __init__(self, infer_func, param_path, place=None, parallel=False):
 
     def infer(self, inputs, return_numpy=True):
         """
-        :param inputs: a map of {"input_name": input_var} that will be feed into the inference program
-        to get the predict value
-        :return: the predict value of the inference model
+        Do Inference for Inputs
+
+        Args:
+            inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
+            return_numpy (bool): transform return value into numpy or not
+
+        Returns:
+            Tensor or Numpy: the predict value of the inference model for the inputs
+
+        Examples:
+            .. code-block:: python
+
+                tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
+                results = inferencer.infer({'x': tensor_x})
         """
         if not isinstance(inputs, dict):
             raise ValueError(