Skip to content

Commit d92c671

Browse files
committed
add python forward unittest.
1 parent dcfbbd3 commit d92c671

File tree

6 files changed

+268
-66
lines changed

6 files changed

+268
-66
lines changed

paddle/operators/crf_op.cc

Lines changed: 0 additions & 48 deletions
This file was deleted.
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/operators/linear_chain_crf_op.h"
16+
17+
namespace paddle {
18+
namespace operators {
19+
20+
class LinearChainCrfOpMaker : public framework::OpProtoAndCheckerMaker {
21+
public:
22+
LinearChainCrfOpMaker(framework::OpProto* proto,
23+
framework::OpAttrChecker* op_checker)
24+
: OpProtoAndCheckerMaker(proto, op_checker) {
25+
AddInput(
26+
"Emission",
27+
"(LoDTensor, default: LoDTensor<float>). "
28+
"The unscaled emission weight matrix for the linear chain CRF. "
29+
"This input is a LoDTensor with shape [N x D] where N is the total "
30+
"element number of all input squences in a mini-batch, "
31+
"and D is the total tag number.");
32+
AddInput(
33+
"Transition",
34+
"(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
35+
"The learnable parameter for linear_chain_crf operator. "
36+
"See more details in the operator's comments.");
37+
AddInput(
38+
"Label",
39+
"(LoDTensor, default: LoDTensor<int>). The ground truth which is a 2-D "
40+
"LoDTensor with shape [N x 1], where N is the total element number in "
41+
"a mini-batch.");
42+
AddOutput(
43+
"Alpha",
44+
"Tensor, default: Tensor<float>. The forward vectors for the entire "
45+
"batch. A two dimensional tensor with shape [N x D], "
46+
"denoted as \f$\alpha\f$. \f$\alpha$\f is a memo table used to "
47+
"calculate the normalization factor in CRF. \f$\alpha[k, v]$\f stores "
48+
"the unnormalized probabilites of all possible unfinished sequences of "
49+
"tags that end at position \f$k$\f with tag \f$v$\f. For each \f$k$\f, "
50+
"\f$\alpha[k, v]$\f is a vector of length \f$D$\f with a component for "
51+
"each tag value \f$v$\f. This vector is called a forward vecotr and "
52+
"will also be used in backward computations.")
53+
.AsIntermediate();
54+
AddOutput(
55+
"LogLikelihood",
56+
"(Tensor, default: Tensor<float>). The logarithm of the conditional "
57+
"likelihood of each training sample in a mini-batch. This is a 2-D "
58+
"tensor with shape [S x 1], where S is the sequence number in a "
59+
"mini-batch. "
60+
"Note: S is equal to the sequence number in a mini-batch. The output "
61+
"is no longer a LoDTensor.");
62+
AddComment(R"DOC(
63+
Conditional Random Field defines an undirected probabilistic graph with nodes
64+
denoting random variables and edges denoting dependencies between these
65+
variables. CRF learns the conditional probability \f$P(Y|X)\f$, where
66+
\f$X = (x_1, x_2, ... , x_n)\f$ are structured inputs and
67+
\f$Y = (y_1, y_2, ... , y_n)\f$ are labels for the inputs.
68+
69+
Linear chain CRF is a special case of CRF that is useful for sequence labeling
70+
task. Sequence labeling tasks do not assume a lot of conditional
71+
independences among inputs. They only concern about the input and the output
72+
being linear sequences. Thus, the graph model of CRF is a simple chain or
73+
a line, which results in a linear chain CRF.
74+
75+
This operator implements the Forward-Backward algorithm for linear chain CRF.
76+
Please see http://www.cs.columbia.edu/~mcollins/fb.pdf for reference.
77+
78+
Equation:
79+
80+
- Denote the first input of this operator (Emission) as \f$x\f$ here.
81+
- The first D values of the second input (Transition) of this operator are for
82+
starting weights, denoted as \f$a\f$ here.
83+
- The next D values of the second input (Transition) of this operator are for
84+
ending weights, denoted as \f$b\f$ here.
85+
- The remaning values of the second input (Transition) are for transition
86+
weights, denoted as \f$w\f$ here.
87+
- Denote the third input of this operator (Label) as \f$s\f$ here.
88+
89+
The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as:
90+
\f$P(s) = (1/Z) exp(a_{s_1} + b_{s_L}
91+
+ \sum_{l=1}^L x_{s_l}
92+
+ \sum_{l=2}^L w_{s_{l-1},s_l})\f$
93+
where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over
94+
all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight
95+
to the linear chain CRF.
96+
97+
Finaly, the linear chain CRF operator outputs the logarithm of the conditional
98+
likelihood of each training sample in a mini-batch.
99+
100+
NOTE:
101+
1. The feature function for a CRF is made up of the emission features and the
102+
transition features. The emission feature weights are NOT computed in
103+
this operator. They MUST be computed first before this operator is called.
104+
105+
2. Because this operator performs globally normaliztion over all possible
106+
sequences internally, it expects UNSCALED emission feature weights.
107+
Please do not call this op with the emission feature being output of any
108+
nonlinear activation.
109+
110+
3. The 2nd dimension of the first input of this operator (Emission) MUST be
111+
equal to the tag number.
112+
113+
)DOC");
114+
}
115+
};
116+
117+
class LinearChainCrfOp : public framework::OperatorWithKernel {
118+
public:
119+
using framework::OperatorWithKernel::OperatorWithKernel;
120+
121+
protected:
122+
void InferShape(framework::InferShapeContextBase* ctx) const override {}
123+
};
124+
125+
class LinearChainCrfGradOp : public framework::OperatorWithKernel {
126+
public:
127+
using framework::OperatorWithKernel::OperatorWithKernel;
128+
129+
protected:
130+
void InferShape(framework::InferShapeContextBase* ctx) const override {}
131+
};
132+
133+
} // namespace operators
134+
} // namespace paddle
135+
136+
namespace ops = paddle::operators;
137+
REGISTER_OP(linear_chain_crf, ops::LinearChainCrfOp, ops::LinearChainCrfOpMaker,
138+
linear_chain_crf_grad, ops::LinearChainCrfGradOp);
139+
REGISTER_OP_CPU_KERNEL(linear_chain_crf, ops::LinearChainCrfOpKernel<float>);
140+
REGISTER_OP_CPU_KERNEL(linear_chain_crf_grad,
141+
ops::LinearChainCrfGradOpKernel<float>);

paddle/operators/crf_op.h renamed to paddle/operators/linear_chain_crf_op.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ namespace paddle {
2020
namespace operators {
2121

2222
template <typename T>
23-
class CrfOpKernel : public framework::OpKernel<T> {
23+
class LinearChainCrfOpKernel : public framework::OpKernel<T> {
2424
public:
2525
void Compute(const framework::ExecutionContext& ctx) const override {
2626
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
@@ -29,7 +29,7 @@ class CrfOpKernel : public framework::OpKernel<T> {
2929
};
3030

3131
template <typename T>
32-
class CrfGradOpKernel : public framework::OpKernel<T> {
32+
class LinearChainCrfGradOpKernel : public framework::OpKernel<T> {
3333
public:
3434
void Compute(const framework::ExecutionContext& ctx) const override {
3535
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),

paddle/operators/softmax_with_cross_entropy_op.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ class SoftmaxWithCrossEntropyOpMaker
3232
AddInput("Label",
3333
"(Tensor, default: Tensor<int>), The ground truth which is a 2-D "
3434
"tensor. "
35-
"If softLable is set to 0, Label is a Tensor<int> with shape [N x "
36-
"1]. "
37-
"If softLable is set to 1, Label is a Tensor<float/double> "
35+
"If softLabel is set to false, Label is a Tensor<int> with shape "
36+
"[N x 1]."
37+
"If softLabel is set to true, Label is a Tensor<float/double> "
3838
"with shape [N x K].");
3939
AddOutput(
4040
"Softmax",

python/paddle/v2/framework/tests/test_crf_op.py

Lines changed: 0 additions & 13 deletions
This file was deleted.
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import unittest
2+
import random
3+
import numpy as np
4+
5+
from op_test import OpTest
6+
7+
8+
class LinearChainCrfForward(object):
9+
def __init__(self, seq_start_positions, emission_weights,
10+
transition_weights, labels):
11+
self.tag_num = emission_weights.shape[1]
12+
self.seq_num = len(seq_start_positions) - 1
13+
14+
self.seq_start_positions = seq_start_positions
15+
self.labels = labels
16+
self.x = emission_weights
17+
18+
self.x_row_max = np.amax(self.x, axis=1, keepdims=True)
19+
self.x_exps = np.exp(self.x - self.x_row_max)
20+
21+
# unnormalized logits of the transition weights for the start mark.
22+
self.a = transition_weights[0, :]
23+
self.a_exps = np.exp(self.a)
24+
# unnormalized logits of the transition weights for the end mark.
25+
self.b = transition_weights[1, :]
26+
self.b_exps = np.exp(self.b)
27+
# unnormalized logits of the transition weights for all the other tags.
28+
self.w = transition_weights[2:, :]
29+
self.w_exps = np.exp(self.w)
30+
31+
# The output of linear chain crf operator.
32+
# alpha is a memo table in dynamic programming to caculate
33+
# nomalization factor.
34+
self.alpha = np.zeros(
35+
(seq_start_positions[-1], self.tag_num), dtype="float32")
36+
self.log_likelihood = np.zeros((self.tag_num, 1))
37+
38+
def _l1_norm(self, x):
39+
s = np.sum(x)
40+
x /= s
41+
return s
42+
43+
def _forward_a_sequence(self, x, x_row_max, x_exps, label, alpha):
44+
seq_len = x_row_max.shape[0]
45+
log_likelihood = 0.
46+
47+
for i in range(self.tag_num):
48+
alpha[0, i] = self.a_exps[i] * x_exps[0, i]
49+
log_likelihood = -x_row_max[0] - np.log(self._l1_norm(alpha[0, :]))
50+
51+
# calculate the unnormalized logits of the normalization factor.
52+
for k in range(1, seq_len):
53+
for i in range(self.tag_num):
54+
s = 0.
55+
for j in range(self.tag_num):
56+
s += alpha[k - 1, j] * self.w_exps[j, i]
57+
alpha[k, i] = x_exps[k, i] * s
58+
log_likelihood -= x_row_max[k] + np.log(self._l1_norm(alpha[k, :]))
59+
s = 0.
60+
for i in range(self.tag_num):
61+
s += alpha[-1, i] * self.b_exps[i]
62+
log_likelihood -= np.log(s)
63+
64+
# calculate the noninator part.
65+
log_likelihood += (
66+
self.a[label[0]] + self.x[0, label[0]] + self.b[label[-1]])
67+
for k in range(1, seq_len):
68+
log_likelihood += (
69+
self.x[k, label[k]] + self.w[label[k - 1], label[k]])
70+
return log_likelihood
71+
72+
def crf_forward_compute(self):
73+
for i in range(self.seq_num):
74+
start = self.seq_start_positions[i]
75+
end = self.seq_start_positions[i + 1]
76+
77+
self.log_likelihood[i] = self._forward_a_sequence(
78+
self.x[start:end], self.x_row_max[start:end, :],
79+
self.x_exps[start:end, :], self.labels[start:end, :],
80+
self.alpha[start:end, :])
81+
return self.alpha, self.log_likelihood
82+
83+
84+
class TestLinearChainCrfOp(OpTest):
85+
def set_test_data(self):
86+
SEQ_NUM = 3
87+
TAG_NUM = 17
88+
MAX_SEQ_LEN = 13
89+
90+
# the linear_chain_crf operator only supports sequence (LoD level = 1)
91+
lod = [[0]]
92+
for i in range(SEQ_NUM):
93+
lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN))
94+
95+
emission = np.random.uniform(-1, 1,
96+
[lod[-1][-1], TAG_NUM]).astype("float32")
97+
transition = np.random.uniform(-0.5, 0.5,
98+
[TAG_NUM + 2, TAG_NUM]).astype("float32")
99+
labels = np.random.randint(
100+
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32")
101+
102+
self.inputs = {
103+
"Emission": (emission, lod),
104+
"Transition": transition,
105+
"label": (labels, lod)
106+
}
107+
108+
crf = LinearChainCrfForward(lod[0], emission, transition, labels)
109+
alpha, log_likelihood = crf.crf_forward_compute()
110+
111+
self.outputs = {"Alpha": alpha, "LogLikelihood": log_likelihood}
112+
113+
def setUp(self):
114+
self.op_type = "linear_chain_crf"
115+
self.set_test_data()
116+
117+
def test_check_output(self):
118+
self.check_output()
119+
120+
121+
if __name__ == "__main__":
122+
unittest.main()

0 commit comments

Comments
 (0)