@@ -26,17 +26,16 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
26
26
" Emission" ,
27
27
" (LoDTensor, default: LoDTensor<float>). "
28
28
" The unscaled emission weight matrix for the linear chain CRF. "
29
- " This input is a LoDTensor with shape [N x D] where N is the total "
30
- " element number of all input squences in a mini-batch, "
31
- " and D is the total tag number." );
29
+ " This input is a LoDTensor with shape [N x D] where N is the size of "
30
+ " the mini-batch and D is the total tag number." );
32
31
AddInput (
33
32
" Transition" ,
34
33
" (Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
35
34
" The learnable parameter for the linear_chain_crf operator. "
36
35
" See more details in the operator's comments." );
37
36
AddInput (
38
37
" Label" ,
39
- " (LoDTensor, default: LoDTensor<int>). The groundtruth which is a 2-D "
38
+ " (LoDTensor, default: LoDTensor<int>). The ground truth which is a 2-D "
40
39
" LoDTensor with shape [N x 1], where N is the total element number in "
41
40
" a mini-batch." );
42
41
AddOutput (
@@ -77,12 +76,13 @@ variables. CRF learns the conditional probability \f$P(Y|X)\f$, where
77
76
78
77
Linear chain CRF is a special case of CRF that is useful for sequence labeling
79
78
task. Sequence labeling tasks do not assume a lot of conditional
80
- independences among inputs. They only concern about the input and the output
81
- being linear sequences. Thus, the graph model of such a CRF is a simple chain
82
- or a line, which results in the linear chain CRF.
79
+ independences among inputs. The only constraint they impose is that the input
80
+ and output must be linear sequences. Thus, the graph of such a CRF is a simple
81
+ chain or a line, which results in the linear chain CRF.
83
82
84
83
This operator implements the Forward-Backward algorithm for the linear chain
85
- CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf for reference.
84
+ CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf and
85
+ http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for reference.
86
86
87
87
Equation:
88
88
@@ -111,7 +111,7 @@ likelihood of each training sample in a mini-batch.
111
111
transition features. The emission feature weights are NOT computed in
112
112
this operator. They MUST be computed first before this operator is called.
113
113
114
- 2. Because this operator performs globally normaliztion over all possible
114
+ 2. Because this operator performs global normalization over all possible
115
115
sequences internally, it expects UNSCALED emission feature weights.
116
116
Please do not call this op with the emission feature being output of any
117
117
nonlinear activation.
@@ -171,9 +171,10 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
171
171
ctx->SetOutputDim (" Alpha" , emission_dims);
172
172
ctx->SetOutputDim (" EmissionExps" , emission_dims);
173
173
ctx->SetOutputDim (" TransitionExps" , transition_dims);
174
- // ( TODO caoying) This is tricky. The 1st dimension of Output(LogLikelihood)
174
+ // TODO( caoying) This is tricky. The 1st dimension of Output(LogLikelihood)
175
175
// is the sequence number in a mini-batch. The dimension set here should be
176
- // resized to its correct size in the function Compute.
176
+ // resized to its correct size in the function Compute. Fix this once we can
177
+ // get LoD information in the InferShape interface.
177
178
ctx->SetOutputDim (" LogLikelihood" , {emission_dims[0 ], 1 });
178
179
}
179
180
@@ -236,7 +237,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
236
237
237
238
protected:
238
239
// Explicitly set that the data type of output of the linear_chain_crf_grad
239
- // operator is determined by its input: graidents of LogLikelihood.
240
+ // operator is determined by its input: gradients of LogLikelihood.
240
241
framework::DataType IndicateDataType (
241
242
const framework::ExecutionContext& ctx) const override {
242
243
return framework::ToDataType (
0 commit comments