polish_g_to_l (#5367)

kexinzhao · wangkuiyi · commit c0d2ca54b9bf · 2017-11-03T19:12:32.000-07:00
diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc
@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "The source input of gather op");
     AddInput("Index", "The index input of gather op");
-    AddOutput("Out", "The output of add op");
+    AddOutput("Out", "The output of gather op");
     AddComment(R"DOC(
-Gather Operator by selecting from the first axis,
+Gather Operator.
+
+$Out = X[Index]$
+
+Out is obtained by gathering entries of the outer-most dimension 
+of X indexed by Index and concatenate them together.
+
+Example:
+
+X = [[1, 2],
+     [3, 4],
+     [5, 6]]
+
+Index = [[1, 2]]
+
+Then:
+
+Out = [[3, 4],
+       [5, 6]]
 
-Out = X[Index]
 )DOC");
   }
 };
diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc
@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
   GaussianRandomOpMaker(framework::OpProto* proto,
                         framework::OpAttrChecker* op_checker)
       : framework::OpProtoAndCheckerMaker(proto, op_checker) {
-    AddOutput("Out", "output matrix of random op");
-    AddComment(R"DOC(
-GaussianRandom operator.
-Use to initialize tensor with gaussian random generator.
-)DOC");
+    AddOutput("Out", "Output matrix of gaussian random op");
 
-    AddAttr<std::vector<int>>("shape", "The dimension of random tensor.");
-    AddAttr<float>("mean", "mean of random tensor.").SetDefault(.0f);
-    AddAttr<float>("std", "std of random tensor.").SetDefault(1.0f);
+    AddAttr<std::vector<int>>("shape",
+                              "(vector<int>) "
+                              "The dimension of random tensor.");
+    AddAttr<float>("mean",
+                   "(float, default 0.0) "
+                   "mean of random tensor.")
+        .SetDefault(.0f);
+    AddAttr<float>("std",
+                   "(float, default 1.0) "
+                   "std of random tensor.")
+        .SetDefault(1.0f);
     AddAttr<int>("seed",
+                 "(int, default 0) "
                  "Random seed of generator."
-                 "0 means use system wide seed")
+                 "0 means use system wide seed.")
         .SetDefault(0);
-    AddAttr<int>("data_type", "output data type")
+    AddAttr<int>("data_type",
+                 "(int, default 5(FP32)) "
+                 "Output data type.")
         .SetDefault(framework::DataType::FP32);
+
+    AddComment(R"DOC(
+GaussianRandom Operator.
+
+Used to initialize tensors with gaussian random generator.
+
+)DOC");
   }
 };
 
diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc
@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("HiddenPrev",
              "(Tensor) Matrix with shape [batch_size, frame_size] for the "
              "states of previous time step.");
-    AddInput("Weight",
-             "(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
-             "The elements continuous in memory can be divided into two parts. "
-             "The first part are weights of the update gate and reset gate "
-             "with shape [frame_size, frame_size * 2], and the second part are "
-             "weights of output candidate with shape [frame_size, frame_size]");
-    AddInput("Bias",
-             "(Tensor) Bias vector with shape [1, frame_size * 3] concating "
-             "bias of the update gate, reset gate and output candidate.")
+    AddInput(
+        "Weight",
+        "(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
+        "The elements continuous in memory can be divided into two parts. "
+        "The first part are weights of the update gate and reset gate "
+        "with shape [frame_size, frame_size * 2], and the second part are "
+        "weights of output candidate with shape [frame_size, frame_size].");
+    AddInput(
+        "Bias",
+        "(Tensor) Bias vector with shape [1, frame_size * 3] concatenating "
+        "bias of the update gate, reset gate and output candidate.")
         .AsDispensable();
     AddOutput("Gate",
               "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
-              "output of update gate, reset gate and output candidate")
+              "output of update gate, reset gate and output candidate.")
         .AsIntermediate();
     AddOutput("ResetHiddenPrev",
               "(Tensor) Matrix with shape [batch_size, frame_size] for the "
@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault(sigmoid)
         .InEnum({identity, sigmoid, tanh, relu});
     AddComment(R"DOC(
-GRUUnitOp implements part calculations of the GRU unit as following:
+GRUUnit Operator.
 
-\f[
-update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
-reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r)  \\
-output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
-output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev)
-\f]
+This operator implements partial calculations of the GRU unit as follows:
+
+$$
+update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\
+reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r)  \\
+output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\
+output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev})
+$$
 
 The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
+
 )DOC");
   }
 };
diff --git a/paddle/operators/huber_loss_op.cc b/paddle/operators/huber_loss_op.cc
@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
               "The shape is same as Input(X) and will be reused in backward.")
         .AsIntermediate();
     AddOutput("Out",
-              "The output tensor with shape [batch_size, 1] which represents "
-              "the huber loss.");
+              "The output tensor with shape [batch_size, 1] "
+              "which represents the huber loss.");
     AddAttr<AttrType>("delta", "Hyper parameter in huber loss.");
     AddComment(R"DOC(
+HuberLoss Operator.
+
 Huber loss is a loss function used in robust regression. We define X as the
 input value and Y as the target value. Huber loss can evaluate the fitness of
 X to Y. Different from MSE loss, Huber loss is more robust for outliers. The
diff --git a/paddle/operators/increment_op.cc b/paddle/operators/increment_op.cc
@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "(Tensor) The input tensor of increment operator");
     AddOutput("Out", "(Tensor) The output tensor of increment operator.");
-    AddComment(R"DOC(Increment operator
-
-The equation is: Out = X + step
-)DOC");
     AddAttr<AttrType>("step",
+                      "(float, default 1.0) "
                       "The step size by which the "
                       "input tensor will be incremented.")
         .SetDefault(1.0);
+    AddComment(R"DOC(
+Increment Operator.
+
+The equation is: 
+$$Out = X + step$$
+
+)DOC");
   }
 };
 
diff --git a/paddle/operators/l1_norm_op.cc b/paddle/operators/l1_norm_op.cc
@@ -57,7 +57,7 @@ L1 Norm Operator.
 
 Computes the L1 norm of a tensor.
 
-Out = sum (abs(X))
+$$Out = \sum{|X|}$$
 
 )DOC");
   }
diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc
@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
   LoadOpProtoMaker(framework::OpProto *proto,
                    framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddOutput("Out", "The tensor need to be loaded");
-    AddComment(R"DOC(Load Operator
-Load operator will load a tensor variable from disk file.
-)DOC");
+    AddOutput("Out", "(Tensor) The tensor need to be loaded");
     AddAttr<std::string>("file_path",
+                         "(string) "
                          "Variable will be loaded from \"file_path\".")
         .AddCustomChecker(
             [](const std::string &path) { return !path.empty(); });
+    AddComment(R"DOC(
+Load Operator.
+
+Load operator will load a tensor variable from disk file.
+
+)DOC");
   }
 };
 }  // namespace operators
diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc
@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
                      framework::OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("W",
-             "An input represents embedding tensors,"
-             " which is a learnable parameter.");
+             "An input represents embedding tensors, "
+             "which is a learnable parameter.");
     AddInput("Ids",
-             "An input with type int32 or int64"
-             "contains the ids to be looked up in W."
-             "Ids must be a column vector with rank = 2."
-             "The 2nd dimension size must be 1");
-    AddOutput("Out", "The lookup results, which have the same type with W.");
-    AddAttr<bool>("is_sparse", "Sparse update").SetDefault(false);
+             "An input with type int32 or int64 "
+             "contains the ids to be looked up in W. "
+             "Ids must be a column vector with rank = 2. "
+             "The 2nd dimension size must be 1.");
+    AddOutput("Out", "The lookup results, which have the same type as W.");
+    AddAttr<bool>("is_sparse",
+                  "(boolean, default false) "
+                  "Sparse update")
+        .SetDefault(false);
     AddComment(R"DOC(
+Lookup Table Operator.
+
 This operator is used to perform lookups on the parameter W,
 then concatenated into a dense tensor.
 
-The input `Ids` can carry the LoD (Level of Details) information,
-or not. And the output only shares the LoD with input `Ids`.
+The input Ids can carry the LoD (Level of Details) information,
+or not. And the output only shares the LoD information with input Ids.
+
 )DOC");
   }
 };
diff --git a/paddle/operators/lrn_op.cc b/paddle/operators/lrn_op.cc
@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   LRNOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", R"DOC(
- (Tensor) The input of LRN operator. It must be a 4D tenor with NCHW format.
- )DOC");
-
+    AddInput("X",
+             "(Tensor) The input of LRN operator. "
+             "It must be a 4D tenor with NCHW format.");
     AddOutput("Out",
               "(Tensor) The output of LRN operator, which is also the 4D "
               "tensor with NCHW format.");
-    AddOutput("MidOut", R"Doc(
-(Tensor)Middle result of lrn op.It's computed in forward process 
-and also used in backward process.
-    )Doc");
-
-    AddAttr<int>("n", R"DOC(
-(int, default 5)n is “adjacent” kernel maps at the same spatial position.
-        )DOC")
+    AddOutput("MidOut",
+              "(Tensor) Middle result of LRN operator. It's computed in "
+              "forward process and also used in backward process.");
+
+    AddAttr<int>("n",
+                 "(int default 5) "
+                 "n is the \"adjacent\" kernel that maps "
+                 "at the same spatial position.")
         .SetDefault(5)
         .GreaterThan(0);
 
-    AddAttr<T>("k", R"DOC(
-(float, default 2.0)k is the bias.
-        )DOC")
+    AddAttr<T>("k",
+               "(float, default 2.0) "
+               "k is the bias.")
         .SetDefault(2.0)
         .GreaterThan(0.0);
 
-    AddAttr<T>("alpha", R"DOC(
-(float, default 0.0001)alpha is the scale number.
-        )DOC")
+    AddAttr<T>("alpha",
+               "(float, default 0.0001) "
+               "alpha is the scale number.")
         .SetDefault(0.0001)
         .GreaterThan(0.0);
 
-    AddAttr<T>("beta", R"DOC(
-(float, default 0.75)beta is the power number.
-        )DOC")
+    AddAttr<T>("beta",
+               "(float, default 0.75) "
+               "beta is the power number.")
         .SetDefault(0.75)
         .GreaterThan(0.0);
 
     AddComment(R"DOC(
- Local Response Normalization.
-
- This Function comes from the paper
- "ImageNet Classification with Deep Convolutional Neural Networks".
+Local Response Normalization Operator.
 
- The original formula is:
+This operator comes from the paper
+"ImageNet Classification with Deep Convolutional Neural Networks".
 
-                                Input(i, x, y)
- Output(i, x, y) = ----------------------------------------------
-                                 -- upper
-                    (k + alpha * >  (Input(j, x, y))^2) ^ (beta)
-                                 -- j = lower
+The original formula is:
 
- upper is `min(C, c + n/2)`
- lower if `max(0, c - n/2)`
+$$
+Output(i, x, y) = Input(i, x, y) / \left(
+k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
+(Input(j, x, y))^2
+\right)^{\beta}
+$$
 
- Function implementation:
+Function implementation:
 
- inputs and outpus is NCHW format, while input.shape.ndims() is equal 4.
- And the meaning of each dimension(0-3) is respectively batch size,
- feature maps, rows and columns.
+Inputs and outpus are in NCHW format, while input.shape.ndims() equals 4.
+And dimensions 0 ~ 3 represent batch size, feature maps, rows,
+and columns, respectively.
 
- Input and Output in the above formula is for each map(i) of one image, and
- Input(i, x, y), Output(i, x, y) represents an element in an image.
+Input and Output in the formula above is for each map(i) of one image, and
+Input(i, x, y), Output(i, x, y) represents an element in an image.
 
- C is the number of feature maps of one image, and n is a hyper-parameters
- is configured when Function is initialized. The sum in the denominator
- is the sum of the same position in the neighboring maps.
-    )DOC");
+C is the number of feature maps of one image. n is a hyper-parameter
+configured when operator is initialized. The sum in the denominator
+is the sum of the same positions in the neighboring maps.
+    
+)DOC");
   }
 };
 
diff --git a/paddle/operators/lstm_op.cc b/paddle/operators/lstm_op.cc
diff --git a/paddle/operators/lstm_unit_op.cc b/paddle/operators/lstm_unit_op.cc

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ L1 Norm Operator.`
`57`	`57`
`58`	`58`	`Computes the L1 norm of a tensor.`
`59`	`59`
`60`		`-Out = sum (abs(X))`
	`60`	`+$$Out = \sum{\|X\|}$$`
`61`	`61`
`62`	`62`	`)DOC");`
`63`	`63`	`}`