Skip to content

Commit 7408a4c

Browse files
authored
Merge pull request #5354 from kexinzhao/cos_sim_to_dynamic_recur_op
polish operator doc
2 parents 906e256 + 6a07af0 commit 7408a4c

File tree

8 files changed

+78
-56
lines changed

8 files changed

+78
-56
lines changed

paddle/operators/accuracy_op.cc

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
3333

3434
auto inference_dim = ctx->GetInputDim("Out");
3535
auto label_dim = ctx->GetInputDim("Label");
36-
// Assume indices has same shape with infernece, because
36+
// Assume indices has same shape as inference, because
3737
// it's the output of topk.
3838

3939
PADDLE_ENFORCE_EQ(label_dim.size(), 2, "label's rank must be 2.");
@@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
6060
framework::OpAttrChecker *op_checker)
6161
: OpProtoAndCheckerMaker(proto, op_checker) {
6262
// TODO(typhoonzero): support both inference value and indices.
63-
AddInput("Out", "topk (inferences) the network output");
64-
AddInput("Indices", "topk (indices) the network output");
63+
AddInput("Out", "The network output of topk (inferences)");
64+
AddInput("Indices", "The the network output of topk (indices)");
6565
AddInput("Label", "Label of the training data");
6666
// TODO(typhoonzero): AddInput("Weight", ...
6767
AddOutput("Accuracy", "The accuracy of current batch");
6868

6969
AddComment(R"DOC(
70-
Accuracy. It will print accuracy rate for classification.
71-
The accuracy is:
72-
.. math::
73-
accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})
70+
Accuracy Operator.
71+
72+
It will print accuracy rate for classification.
73+
The accuracy is calculated as follows:
74+
75+
$$accuracy = \frac{NumOfCorrectPredicts}{NumOfAllSamples}$$
76+
77+
Both the input Out and Label can carry the LoD (Level of Details)
78+
information, or not. But the output only shares the LoD information
79+
with the input Out(Inference).
7480
75-
Both the input `Out` and `Label` can carry the LoD (Level of Details)
76-
information, or not. But the output only shares the LoD with input `Inference`.
7781
)DOC");
7882
}
7983
};

paddle/operators/conv_cudnn_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
2929
"workspace is a section of GPU memory which will be "
3030
"allocated/freed each time the operator runs, larger "
3131
"workspace size can increase performance but also requires "
32-
"better hardward. This size should be carefully setted.")
32+
"better hardware. This size should be chosen carefully.")
3333
.SetDefault(4096);
3434
}
3535
};

paddle/operators/cos_sim_op.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
7979
AddComment(R"DOC(
8080
Cosine Similarity Operator.
8181
82-
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)).
82+
$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$
8383
84-
The input `X` and `Y` must have the same shape, except that the 1st dimension
85-
of input `Y` could be just 1 (different from input `X`), which will be
86-
broadcasted to match the shape of input `X` before computing their cosine
84+
The input X and Y must have the same shape, except that the 1st dimension
85+
of input Y could be just 1 (different from input X), which will be
86+
broadcasted to match the shape of input X before computing their cosine
8787
similarity.
8888
89-
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
90-
or not. But the output only shares the LoD with input `X`.
89+
Both the input X and Y can carry the LoD (Level of Details) information,
90+
or not. But the output only shares the LoD information with input X.
91+
9192
)DOC");
9293
}
9394
};

paddle/operators/crop_op.cc

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -56,55 +56,56 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
5656
: OpProtoAndCheckerMaker(proto, op_checker) {
5757
AddInput("X",
5858
"The input of pad op. "
59-
"The input should be a k-D tensor(k > 0 and k < 7)");
59+
"The input should be a k-D tensor(k > 0 and k < 7).");
6060
AddInput("Y",
61-
"The input used as reference for cropping"
62-
" with the same dimension as X. ")
61+
"The input used as reference for cropping, "
62+
"which is of the same dimensions as X.")
6363
.AsDispensable();
6464
AddOutput("Out",
65-
"The output of crop op "
66-
"with the same dimension as X.");
65+
"The output of crop op, "
66+
"which is of the same dimensions as X.");
6767
AddAttr<std::vector<int>>("offsets",
68-
"A list<int> describing offsets to be cropped."
69-
"The size of offsets list should be as same as "
70-
"dimension size of input X.");
68+
"A list<int> describing offsets to be cropped. "
69+
"The size of offsets list should be the same as "
70+
"the dimension size of input X.");
7171
AddAttr<std::vector<int>>("shape",
72-
"A list<int> describing the shape of output."
73-
"The size of shape list should be as same as "
74-
"dimension size of input X.")
72+
"A list<int> describing the shape of output. "
73+
"The size of shape list should be the same as "
74+
"the dimension size of input X.")
7575
.SetDefault(std::vector<int>());
7676
AddComment(R"DOC(
7777
Crop Operator.
78+
7879
Crop input into output, as specified by offsets and shape.
7980
8081
There are two ways to set shape:
81-
1. referenc input: crop input X as shape as reference input.
82+
1. reference input: crop input X into the same shape as reference input.
8283
The dimension of reference input should
83-
be as same as input X.
84-
2. shape list: crop input X by shape described by a list<int>.
85-
The size of shape list should be as same as
86-
dimension size of input X.
84+
be the same as the dimension of input X.
85+
2. shape list: crop input X into the shape described by a list<int>.
86+
The size of shape list should be the same as
87+
the dimension size of input X.
8788
8889
The input should be a k-D tensor(k > 0 and k < 7). As an example:
8990
9091
Given:
9192
9293
X = [[0, 1, 2, 0, 0]
9394
[0, 3, 4, 0, 0]
94-
[0, 0, 0, 0, 0]]
95+
[0, 0, 0, 0, 0]],
9596
9697
and
9798
98-
offsets = [0, 1]
99+
offsets = [0, 1],
99100
100101
and
101102
102-
shape = [2, 2]
103+
shape = [2, 2],
103104
104-
then we get
105+
we get:
105106
106107
Out = [[1, 2],
107-
[3, 4]]
108+
[3, 4]].
108109
109110
)DOC");
110111
}

paddle/operators/cross_entropy_op.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
117117
"Label",
118118
"(Tensor, default Tensor<int>), the ground truth which is "
119119
"a 2-D tensor. "
120-
"When soft_label is set to false, `Label` is a Tensor<int> with shape "
120+
"When soft_label is set to false, Label is a Tensor<int> with shape "
121121
"[N x 1]. "
122-
"When soft_label is set to true, `Label` is a Tensor<float/double> "
122+
"When soft_label is set to true, Label is a Tensor<float/double> "
123123
"with shape [N x K].");
124124
AddOutput("Y",
125125
"(Tensor, default Tensor<float>), a 2-D tensor "
@@ -137,13 +137,13 @@ computation.
137137
1) One-hot cross-entropy:
138138
soft_label = false, Label[i, 0] indicates the class index for sample i:
139139
140-
Y[i] = -log(X[i, Label[i]])
140+
$Y[i] = -\log(X[i, Label[i]])$
141141
142142
2) Soft-label cross-entropy:
143143
soft_label = true, Label[i, j] indicates the soft label of class j
144144
for sample i:
145145
146-
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
146+
$Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$
147147
148148
Please make sure that in this case the summuation of each row of Label
149149
equals one.
@@ -153,8 +153,9 @@ computation.
153153
non-zero element (equals 1), soft-label cross-entropy degenerates to a
154154
one-hot cross-entropy with one-hot label representation.
155155
156-
Both the input `X` and `Label` can carry the LoD (Level of Details) information,
157-
or not. But the output only shares the LoD with input `X`.
156+
Both the input X and Label can carry the LoD (Level of Details) information,
157+
or not. But the output only shares the LoD information with input X.
158+
158159
)DOC");
159160
}
160161
};

paddle/operators/decayed_adagrad_op.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
7575
"Constant for numerical stability")
7676
.SetDefault(1.0e-6f);
7777
AddComment(R"DOC(
78+
Decayed Adagrad Optimizer.
7879
79-
Decayed Adagrad
80+
The update is done as follows:
8081
81-
moment_out = decay * moment + (1 - decay) * grad * grad
82-
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
82+
$$
83+
moment\_out = decay * moment + (1 - decay) * grad * grad \\
84+
param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon}
85+
$$
86+
87+
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
88+
does not have an epsilon attribute. It is added here for numerical
89+
stability to avoid the division by zero error.
8390
8491
)DOC");
8592
}

paddle/operators/dropout_op.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
4343
DropoutOpMaker(framework::OpProto* proto,
4444
framework::OpAttrChecker* op_checker)
4545
: OpProtoAndCheckerMaker(proto, op_checker) {
46-
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
47-
.SetDefault(.5f);
48-
AddAttr<bool>("is_training", "Whether in training phase.").SetDefault(true);
49-
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
5046
AddInput("X", "The input of dropout op.");
5147
AddOutput("Out", "The output of dropout op.");
5248
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
5349

50+
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
51+
.SetDefault(.5f);
52+
AddAttr<bool>("is_training", "True if in training phase.").SetDefault(true);
53+
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
54+
5455
AddComment(R"DOC(
5556
Dropout Operator.
5657
57-
'Dropout' refers to randomly dropping out units in a nerual network. It is a
58+
Dropout refers to randomly dropping out units in a nerual network. It is a
5859
regularization technique for reducing overfitting by preventing neuron
5960
co-adaption during training. The dropout operator randomly set (according to
6061
the given dropout probability) the outputs of some units to zero, while others
61-
being set to their inputs.
62+
are set equal to their corresponding inputs.
63+
6264
)DOC");
6365
}
6466
};

paddle/operators/dynamic_recurrent_op.cc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,20 +386,26 @@ class DynamicRecurrentOpProtoAndCheckerMaker
386386
RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward];
387387
// inputs and outputs stored in proto
388388
AddInput(name.inlinks,
389-
"the inputs that need to be segmented for each step.")
389+
"The inputs that need to be segmented for each step.")
390390
.AsDuplicable();
391-
AddInput(name.initial_states, "variables to initialize states.")
391+
AddInput(name.initial_states, "Variables to initialize the states.")
392392
.AsDuplicable();
393393

394-
AddOutput(name.outlinks, "the outputs that need to concated for all steps.")
394+
AddOutput(name.outlinks,
395+
"The outputs that need to be concatenated for all steps.")
395396
.AsDuplicable();
396397
AddOutput(name.step_scopes, "step scopes");
397398

398399
// Attributes stored in AttributeMap
399400
AddAttr<std::vector<std::string>>(name.ex_states, "names of ex_states");
400401
AddAttr<std::vector<std::string>>(name.states, "names of states");
401402

402-
AddComment("This is a RNN operator for varience-length sequences.");
403+
AddComment(R"DOC(
404+
Dynamic Recurrent Operator.
405+
406+
This is a RNN operator for varience-length sequences.
407+
408+
)DOC");
403409
}
404410
};
405411

0 commit comments

Comments
 (0)