Skip to content

Commit 2ac9a3d

Browse files
committed
follow comments.
1 parent dd2be3d commit 2ac9a3d

File tree

4 files changed

+26
-18
lines changed

4 files changed

+26
-18
lines changed

paddle/framework/tensor_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
235235
PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound.");
236236
PADDLE_ENFORCE_LT(
237237
begin_idx, end_idx,
238-
"The start row index must be smaller than the end row index.");
238+
"The start row index must be lesser than the end row index.");
239239

240240
if (dims_[0] == 1) {
241241
return *this;

paddle/operators/linear_chain_crf_op.cc

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,16 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
2626
"Emission",
2727
"(LoDTensor, default: LoDTensor<float>). "
2828
"The unscaled emission weight matrix for the linear chain CRF. "
29-
"This input is a LoDTensor with shape [N x D] where N is the total "
30-
"element number of all input squences in a mini-batch, "
31-
"and D is the total tag number.");
29+
"This input is a LoDTensor with shape [N x D] where N is the size of "
30+
"the mini-batch and D is the total tag number.");
3231
AddInput(
3332
"Transition",
3433
"(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
3534
"The learnable parameter for the linear_chain_crf operator. "
3635
"See more details in the operator's comments.");
3736
AddInput(
3837
"Label",
39-
"(LoDTensor, default: LoDTensor<int>). The groundtruth which is a 2-D "
38+
"(LoDTensor, default: LoDTensor<int>). The ground truth which is a 2-D "
4039
"LoDTensor with shape [N x 1], where N is the total element number in "
4140
"a mini-batch.");
4241
AddOutput(
@@ -77,12 +76,13 @@ variables. CRF learns the conditional probability \f$P(Y|X)\f$, where
7776
7877
Linear chain CRF is a special case of CRF that is useful for sequence labeling
7978
task. Sequence labeling tasks do not assume a lot of conditional
80-
independences among inputs. They only concern about the input and the output
81-
being linear sequences. Thus, the graph model of such a CRF is a simple chain
82-
or a line, which results in the linear chain CRF.
79+
independences among inputs. The only constraint they impose is that the input
80+
and output must be linear sequences. Thus, the graph of such a CRF is a simple
81+
chain or a line, which results in the linear chain CRF.
8382
8483
This operator implements the Forward-Backward algorithm for the linear chain
85-
CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf for reference.
84+
CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf and
85+
http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for reference.
8686
8787
Equation:
8888
@@ -111,7 +111,7 @@ likelihood of each training sample in a mini-batch.
111111
transition features. The emission feature weights are NOT computed in
112112
this operator. They MUST be computed first before this operator is called.
113113
114-
2. Because this operator performs globally normaliztion over all possible
114+
2. Because this operator performs global normalization over all possible
115115
sequences internally, it expects UNSCALED emission feature weights.
116116
Please do not call this op with the emission feature being output of any
117117
nonlinear activation.
@@ -171,9 +171,10 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
171171
ctx->SetOutputDim("Alpha", emission_dims);
172172
ctx->SetOutputDim("EmissionExps", emission_dims);
173173
ctx->SetOutputDim("TransitionExps", transition_dims);
174-
// (TODO caoying) This is tricky. The 1st dimension of Output(LogLikelihood)
174+
// TODO(caoying) This is tricky. The 1st dimension of Output(LogLikelihood)
175175
// is the sequence number in a mini-batch. The dimension set here should be
176-
// resized to its correct size in the function Compute.
176+
// resized to its correct size in the function Compute. Fix this once we can
177+
// get LoD information in the InferShape interface.
177178
ctx->SetOutputDim("LogLikelihood", {emission_dims[0], 1});
178179
}
179180

@@ -236,7 +237,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
236237

237238
protected:
238239
// Explicitly set that the data type of output of the linear_chain_crf_grad
239-
// operator is determined by its input: graidents of LogLikelihood.
240+
// operator is determined by its input: gradients of LogLikelihood.
240241
framework::DataType IndicateDataType(
241242
const framework::ExecutionContext& ctx) const override {
242243
return framework::ToDataType(

paddle/operators/linear_chain_crf_op.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,6 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
188188
const LoDTensor& src, LoDTensor* dst) {
189189
dst->mutable_data<T>(src.dims(), platform::CPUPlace());
190190
dst->CopyFrom(src, platform::CPUPlace(), ctx);
191-
192191
};
193192

194193
copyLoDTensor(ctx, emission_weights_src, emission_weights_dst);
@@ -248,7 +247,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
248247
for (size_t i = 0; i < tag_num; ++i) {
249248
T sum = 0.;
250249
for (size_t j = 0; j < tag_num; ++j) {
251-
sum += alpha_value[(k - 1) * tag_num + j] *
250+
sum += alpha_value[(k - 1) * tag_num + j] * // (*)
252251
w_exps[(j + state_trans_base_idx) * tag_num + i];
253252
}
254253
alpha_value[k * tag_num + i] = x_exps[k * tag_num + i] * sum;
@@ -291,7 +290,8 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
291290
// These local variables hold the inputs and outputs, garanteeing them on
292291
// CPU memory, to provide a consistent reference.
293292
// TODO(caoying) Fix this by moving all these local variables into the
294-
// class's data members once we can profile the training process.
293+
// class's data members once we can profile the training process, or
294+
// implementing a real GPU kernel for CRF.
295295
Tensor* label = nullptr;
296296
Tensor label_tensor;
297297
Tensor* emission_exps = nullptr;
@@ -344,6 +344,9 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
344344
transition_grad =
345345
ctx.Output<Tensor>(framework::GradVarName("Transition"));
346346
}
347+
348+
// TODO(caoying) Fix this constraint. When the Input(Emission) is from the
349+
// data reader operator, it can have no gradients.
347350
PADDLE_ENFORCE(emission_grad, "Output(Emission@Grad) should not be null.");
348351
emission_grad->mutable_data<T>(platform::CPUPlace());
349352
math::SetConstant<platform::CPUPlace, T>()(ctx.device_context(),
@@ -458,7 +461,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
458461
for (size_t i = 0; i < tag_num; ++i) {
459462
T sum = 0.;
460463
for (size_t j = 0; j < tag_num; ++j) {
461-
sum += w_exps[(i + state_trans_base_idx) * tag_num + j] *
464+
sum += w_exps[(i + state_trans_base_idx) * tag_num + j] * // (**)
462465
x_exps[(k + 1) * tag_num + j] *
463466
beta_value[(k + 1) * tag_num + j];
464467
}
@@ -493,7 +496,8 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
493496

494497
auto x_exps_mat = EigenMatrix<T>::From(emission_exps);
495498

496-
// TODO(caoying): Fix this to avoid using this local variable.
499+
// TODO(caoying): Fix this to avoid using this local variable if when can
500+
// profiling the training process.
497501
Tensor tmp;
498502
tmp.mutable_data<T>(beta->dims(), platform::CPUPlace());
499503
auto tmp_mat = EigenMatrix<T>::From(tmp);

python/paddle/v2/framework/tests/test_linear_chain_crf_op.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ def crf_forward_compute(self):
8383

8484
class TestLinearChainCrfOp(OpTest):
8585
def set_test_data(self):
86+
# TODO(caoying) Fix the unittest by: add the boundary cases when
87+
# sequence lengths are 1, 2, and 3.
88+
8689
SEQ_NUM = 3
8790
TAG_NUM = 17
8891
MAX_SEQ_LEN = 5

0 commit comments

Comments
 (0)