Skip to content

Commit 427644b

Browse files
committed
fix the computation kernels.
1 parent c8d0d37 commit 427644b

File tree

4 files changed

+84
-57
lines changed

4 files changed

+84
-57
lines changed

paddle/framework/operator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ class OperatorWithKernel : public OperatorBase {
659659
if (t != nullptr) {
660660
int tmp = static_cast<int>(ToDataType(t->type()));
661661
PADDLE_ENFORCE(tmp == data_type || data_type == -1,
662-
"DataType of Paddle Op must be same.");
662+
"DataType of Paddle Op must be the same.");
663663
data_type = tmp;
664664
}
665665
}

paddle/operators/linear_chain_crf_op.cc

Lines changed: 75 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,11 @@ class LinearChainCrfOp : public framework::OperatorWithKernel {
165165
"Output(LogLikelihood) should be not null.");
166166

167167
auto emission_dims = ctx->GetInputDim("Emission");
168-
auto transition_dims = ctx->GetInputDim("Transition");
169-
auto label_dims = ctx->GetInputDim("Label");
170-
171168
PADDLE_ENFORCE_EQ(emission_dims.size(), 2UL,
172169
"The Input(Emission) should be a 2-D tensor.");
170+
PADDLE_ENFORCE(emission_dims[0], "An empty mini-batch is not allowed.");
171+
172+
auto transition_dims = ctx->GetInputDim("Transition");
173173
PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL,
174174
"The Input(Transition) should be a 2-D tensor.");
175175
PADDLE_ENFORCE_EQ(
@@ -180,6 +180,8 @@ class LinearChainCrfOp : public framework::OperatorWithKernel {
180180
emission_dims[1], transition_dims[1],
181181
"The 2nd dimension of the Input(Emission) and the Input(Transition) "
182182
"should be equal to the tag number.");
183+
184+
auto label_dims = ctx->GetInputDim("Label");
183185
PADDLE_ENFORCE(label_dims.size() == 2UL && label_dims[1] == 1UL,
184186
"The Input(Label) should be a 2-D tensor with the 2nd "
185187
"dimensions fixed to 1.");
@@ -204,7 +206,7 @@ class LinearChainCrfOp : public framework::OperatorWithKernel {
204206
// operator is determined by its input "Emission".
205207
framework::DataType IndicateDataType(
206208
const framework::ExecutionContext& ctx) const override {
207-
return framework::ToDataType(ctx.Input<Tensor>("Emission")->type());
209+
return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type());
208210
}
209211
};
210212

@@ -224,6 +226,8 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
224226
auto* label = ctx.Input<LoDTensor>("Label");
225227

226228
auto in_lod = emission_weights->lod();
229+
PADDLE_ENFORCE(in_lod.size(), "Input(Emission) is not a sequence.");
230+
227231
// TODO(caoying) The checks related to LoD information should be
228232
// moved into InferShape once after the InferShape is refactored.
229233
PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL,
@@ -266,12 +270,17 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
266270
for (size_t i = 0; i < seq_num; ++i) {
267271
int start_pos = static_cast<int>(in_lod[level][i]);
268272
int end_pos = static_cast<int>(in_lod[level][i + 1]);
273+
if (end_pos == start_pos) {
274+
// If an empty input sequence is given, pad 0 for its cost.
275+
log_likelihood[i] = static_cast<T>(0.);
276+
continue;
277+
}
269278

270-
const Tensor one_seq = emission_weights->Slice<T>(start_pos, end_pos);
271-
Tensor one_seq_row_max = emission_row_max.Slice<T>(start_pos, end_pos);
272-
Tensor one_seq_exps = emission_exps->Slice<T>(start_pos, end_pos);
273-
const Tensor one_seq_label = label->Slice<T>(start_pos, end_pos);
274-
Tensor one_seq_alpha = alpha->Slice<T>(start_pos, end_pos);
279+
const Tensor one_seq = emission_weights->Slice(start_pos, end_pos);
280+
Tensor one_seq_row_max = emission_row_max.Slice(start_pos, end_pos);
281+
Tensor one_seq_exps = emission_exps->Slice(start_pos, end_pos);
282+
const Tensor one_seq_label = label->Slice(start_pos, end_pos);
283+
Tensor one_seq_alpha = alpha->Slice(start_pos, end_pos);
275284

276285
log_likelihood[i] = ForwardOneSequence(
277286
&one_seq, &one_seq_row_max, &one_seq_exps, transition_weights,
@@ -306,7 +315,7 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
306315

307316
for (size_t k = 1; k < seq_length; ++k) {
308317
for (size_t i = 0; i < tag_num; ++i) {
309-
T sum = 0.;
318+
T sum = static_cast<T>(0.);
310319
for (size_t j = 0; j < tag_num; ++j) {
311320
sum += alpha_value[(k - 1) * tag_num + j] *
312321
w_exps[(j + state_trans_base_idx) * tag_num + i];
@@ -326,11 +335,14 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
326335
PADDLE_ENFORCE_LT(
327336
*std::max_element(lbl, lbl + seq_length), tag_num,
328337
"An invalid tag label that execesses the largest tag number.");
338+
329339
// Calculate the nominator part, which depends on the label sequence.
330340
ll += w[lbl[0]] /*start transition*/ + x[lbl[0]] +
331341
w[tag_num + lbl[seq_length - 1]] /*end transition*/;
332-
for (size_t k = 1; k < seq_length; ++k)
333-
ll += x[k * tag_num + lbl[k]] + w[lbl[k - 1] * tag_num + lbl[k]];
342+
for (size_t k = 1; k < seq_length; ++k) {
343+
ll += x[k * tag_num + lbl[k]] +
344+
w[(lbl[k - 1] + state_trans_base_idx) * tag_num + lbl[k]];
345+
}
334346
return -ll;
335347
}
336348
};
@@ -353,12 +365,13 @@ class LinearChainCrfGradOp : public framework::OperatorWithKernel {
353365
"Output(Transition@GRAD) should be not null.");
354366

355367
auto emission_exps_dims = ctx->GetInputDim("EmissionExps");
356-
auto transition_exps_dims =
357-
ctx->GetInputDim(framework::GradVarName("TransitionExps"));
358-
auto label_dims = ctx->GetInputDim("Label");
359-
360368
PADDLE_ENFORCE_EQ(emission_exps_dims.size(), 2UL,
361369
"The Input(EmissionExps) should be a 2-D tensor.");
370+
PADDLE_ENFORCE(emission_exps_dims[0],
371+
"An empty mini-batch is not allowed.");
372+
373+
auto transition_exps_dims =
374+
ctx->GetInputDim(framework::GradVarName("TransitionExps"));
362375
PADDLE_ENFORCE_EQ(transition_exps_dims.size(), 2UL,
363376
"The Input(TransitionExps) should be a 2-D tensor.");
364377
PADDLE_ENFORCE_EQ(
@@ -369,6 +382,8 @@ class LinearChainCrfGradOp : public framework::OperatorWithKernel {
369382
emission_exps_dims[1], transition_exps_dims[1],
370383
"The 2nd dimension of the Input(EmissionExps) and the "
371384
"Input(TransitionExps) should be equal to the tag number.");
385+
386+
auto label_dims = ctx->GetInputDim("Label");
372387
PADDLE_ENFORCE(label_dims.size() == 2UL && label_dims[1] == 1UL,
373388
"The Input(Label) should be a 2-D tensor with the 2nd "
374389
"dimensions fixed to 1.");
@@ -381,6 +396,14 @@ class LinearChainCrfGradOp : public framework::OperatorWithKernel {
381396
ctx->SetOutputDim(framework::GradVarName("Transition"),
382397
transition_exps_dims);
383398
}
399+
400+
protected:
401+
// Explicitly set that the data type of output of the linear_chain_crf_grad
402+
// operator is determined by its input "EmissionExps".
403+
framework::DataType IndicateDataType(
404+
const framework::ExecutionContext& ctx) const override {
405+
return framework::ToDataType(ctx.Input<LoDTensor>("EmissionExps")->type());
406+
}
384407
};
385408

386409
template <typename T>
@@ -390,12 +413,12 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
390413
void Compute(const framework::ExecutionContext& ctx) const override {
391414
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
392415
"This kernel only runs on CPU.");
393-
auto* ll_grad =
394-
ctx.Input<LoDTensor>(framework::GradVarName("LogLikelihood"));
395416
auto* label = ctx.Input<LoDTensor>("Label");
396417
auto* emission_exps = ctx.Input<LoDTensor>("EmissionExps");
397418
auto* transition_exps = ctx.Input<Tensor>("TransitionExps");
398-
auto* alpha = ctx.Input<Tensor>("Alpha");
419+
auto* alpha = ctx.Input<LoDTensor>("Alpha");
420+
const T* ll_grad =
421+
ctx.Input<Tensor>(framework::GradVarName("LogLikelihood"))->data<T>();
399422

400423
auto* emission_grad =
401424
ctx.Output<Tensor>(framework::GradVarName("Emission"));
@@ -413,34 +436,31 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
413436
Tensor beta;
414437
beta.mutable_data<T>(emission_dims, platform::CPUPlace());
415438

416-
auto place = ctx.GetEigenDevice<platform::CPUPlace>();
417-
auto x_grad = EigenMatrix<T>::From(*emission_grad);
418-
auto out_grad = EigenMatrix<T>::From(*ll_grad);
419-
x_grad.device(place) =
420-
x_grad * out_grad.broadcast(Eigen::DSizes<int, 2>(1, emission_dims[1]));
421-
422439
const size_t level = 0; // currently, only support sequence.
423-
auto lod = emission_exps->lod();
440+
auto lod = label->lod();
441+
PADDLE_ENFORCE(lod.size(), "Input(Label) is not a sequence.");
442+
424443
for (size_t i = 0; i < lod[level].size() - 1; ++i) {
425444
int start_pos = static_cast<int>(lod[level][i]);
426445
int end_pos = static_cast<int>(lod[level][i + 1]);
446+
if (end_pos == start_pos) continue;
427447

428448
const Tensor one_seq_emission_exps =
429-
emission_exps->Slice<T>(start_pos, end_pos);
430-
const Tensor one_seq_label = label->Slice<T>(start_pos, end_pos);
431-
const Tensor one_seq_alpha = alpha->Slice<T>(start_pos, end_pos);
432-
Tensor one_seq_beta = beta.Slice<T>(start_pos, end_pos);
433-
Tensor one_seq_emission_grad =
434-
emission_grad->Slice<T>(start_pos, end_pos);
435-
436-
BackwardOneSequence(ctx.device_context(), &one_seq_emission_exps,
437-
transition_exps, &one_seq_alpha, &one_seq_label,
438-
&one_seq_beta, trans_grad, &one_seq_emission_grad);
449+
emission_exps->Slice(start_pos, end_pos);
450+
const Tensor one_seq_label = label->Slice(start_pos, end_pos);
451+
const Tensor one_seq_alpha = alpha->Slice(start_pos, end_pos);
452+
Tensor one_seq_beta = beta.Slice(start_pos, end_pos);
453+
Tensor one_seq_emission_grad = emission_grad->Slice(start_pos, end_pos);
454+
455+
BackwardOneSequence(ctx.device_context(), ll_grad[i],
456+
&one_seq_emission_exps, transition_exps,
457+
&one_seq_alpha, &one_seq_label, &one_seq_beta,
458+
trans_grad, &one_seq_emission_grad);
439459
}
440460
}
441461

442462
protected:
443-
void BackwardOneSequence(const platform::DeviceContext& ctx,
463+
void BackwardOneSequence(const platform::DeviceContext& ctx, const T ll_grad,
444464
const Tensor* emission_exps,
445465
const Tensor* transition_exps, const Tensor* alpha,
446466
const Tensor* label, Tensor* beta,
@@ -457,12 +477,15 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
457477
const size_t state_trans_base_idx = 2;
458478

459479
// Calculate the backwark vectors beta.
460-
for (int i = 0; i < tag_num; ++i)
480+
// First, calculate the initialition state.
481+
for (int i = 0; i < tag_num; ++i) {
461482
beta_value[(seq_length - 1) * tag_num + i] = w_exps[tag_num + i];
483+
}
462484
NormalizeL1<T>(beta_value + (seq_length - 1) * tag_num, tag_num);
485+
463486
for (int k = seq_length - 2; k >= 0; --k) {
464487
for (int i = 0; i < tag_num; ++i) {
465-
T sum = 0.;
488+
T sum = static_cast<T>(0.);
466489
for (int j = 0; j < tag_num; ++j) {
467490
sum += w_exps[(i + state_trans_base_idx) * tag_num + j] *
468491
x_exps[(k + 1) * tag_num + j] *
@@ -476,15 +499,17 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
476499
auto alpha_mat = EigenMatrix<T>::From(*alpha);
477500
auto beta_mat = EigenMatrix<T>::From(*beta);
478501
auto x_grad_mat = EigenMatrix<T>::From(*emission_grad);
502+
x_grad_mat.setConstant(ll_grad);
479503

480504
auto* place = ctx.GetEigenDevice<platform::CPUPlace>();
481505
x_grad_mat.device(*place) = alpha_mat * beta_mat;
482506
x_grad_mat /= x_grad_mat.sum(Eigen::DSizes<int, 1>(1))
483507
.reshape(Eigen::DSizes<int, 2>(seq_length, 1))
484508
.broadcast(Eigen::DSizes<int, 2>(1, tag_num));
485509

486-
for (int k = 0; k < seq_length; ++k)
510+
for (int k = 0; k < seq_length; ++k) {
487511
x_grad_mat(k, label_value[k]) -= static_cast<T>(1);
512+
}
488513

489514
if (transition_grad) {
490515
T* trans_grad = transition_grad->data<T>();
@@ -501,20 +526,23 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
501526
.broadcast(Eigen::DSizes<int, 2>(1, tag_num));
502527

503528
for (int k = 1; k < seq_length; ++k) {
504-
T sum = 0.;
529+
T sum = static_cast<T>(0.);
505530
for (int i = 0; i < tag_num; ++i) {
506-
for (int j = 0; j < tag_num; ++j)
507-
sum += x_exps_mat(i, j) * alpha_mat(k - 1, i) * beta_mat(k, j);
531+
for (int j = 0; j < tag_num; ++j) {
532+
sum += w_exps[(i + state_trans_base_idx) * tag_num + j] *
533+
alpha_mat(k - 1, i) * beta_mat(k, j);
534+
}
508535
}
509-
sum = static_cast<T>(1) / sum;
536+
sum = static_cast<T>(1.) / sum;
510537
for (int i = 0; i < tag_num; ++i) {
511538
for (int j = 0; j < tag_num; ++j) {
512-
trans_grad[(i + 2) * tag_num + j] +=
513-
sum * x_exps_mat(i, j) * alpha_mat(k - 1, i) * beta_mat(k, j);
539+
trans_grad[(i + state_trans_base_idx) * tag_num + j] +=
540+
sum * w_exps[(i + state_trans_base_idx) * tag_num + j] *
541+
alpha_mat(k - 1, i) * beta_mat(k, j);
514542
}
515543
}
516544
trans_grad[label_value[k - 1] * tag_num + label_value[k]] -=
517-
static_cast<T>(1);
545+
static_cast<T>(1.);
518546
}
519547
}
520548
}

paddle/operators/linear_chain_crf_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class LinearChainCrfGradOpKernel : public framework::OpKernel<T> {
4242
void Compute(const framework::ExecutionContext& ctx) const override;
4343

4444
protected:
45-
void BackwardOneSequence(const platform::DeviceContext& ctx,
45+
void BackwardOneSequence(const platform::DeviceContext& ctx, const T ll_grad,
4646
const Tensor* emission_exps,
4747
const Tensor* transition_exps, const Tensor* alpha,
4848
const Tensor* label, Tensor* beta,

python/paddle/v2/framework/tests/test_linear_chain_crf_op.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
from op_test import OpTest
66

7-
import pdb
8-
97

108
class LinearChainCrfForward(object):
119
def __init__(self, seq_start_positions, emission_weights, emission_row_max,
@@ -65,10 +63,10 @@ def _forward_a_sequence(self, x, x_row_max, x_exps, label, alpha):
6563

6664
# calculate the nominator part.
6765
log_likelihood += (
68-
self.a[label[0]] + self.x[0, label[0]] + self.b[label[-1]])
66+
self.a[label[0]] + x[0, label[0]] + self.b[label[-1]])
67+
6968
for k in range(1, seq_len):
70-
log_likelihood += (
71-
self.x[k, label[k]] + self.w[label[k - 1], label[k]])
69+
log_likelihood += (x[k, label[k]] + self.w[label[k - 1], label[k]])
7270
return -log_likelihood
7371

7472
def crf_forward_compute(self):
@@ -77,18 +75,19 @@ def crf_forward_compute(self):
7775
end = self.seq_start_positions[i + 1]
7876

7977
self.log_likelihood[i] = self._forward_a_sequence(
80-
self.x[start:end], self.x_row_max[start:end, :],
78+
self.x[start:end, :], self.x_row_max[start:end, :],
8179
self.x_exps[start:end, :], self.labels[start:end, :],
8280
self.alpha[start:end, :])
8381
return self.alpha, self.log_likelihood
8482

8583

8684
class TestLinearChainCrfOp(OpTest):
8785
def set_test_data(self):
88-
SEQ_NUM = 3
86+
SEQ_NUM = 2
8987
TAG_NUM = 17
90-
MAX_SEQ_LEN = 13
88+
MAX_SEQ_LEN = 5
9189

90+
random.seed(1)
9291
# the linear_chain_crf operator only supports sequence (LoD level = 1)
9392
lod = [[0]]
9493
for i in range(SEQ_NUM):

0 commit comments

Comments
 (0)