Skip to content

Commit 2961674

Browse files
committed
Rewrite sequence expand op
1 parent 4e8fccf commit 2961674

File tree

6 files changed

+97
-264
lines changed

6 files changed

+97
-264
lines changed

paddle/framework/lod_tensor.cc

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -112,28 +112,5 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
112112
lod_ = new_lod;
113113
}
114114

115-
Vector<size_t> expand_lod(Vector<size_t> level, Vector<size_t> indexes,
116-
Vector<size_t> scales, bool repeat) {
117-
Vector<size_t> result;
118-
result.push_back(level[0]);
119-
size_t start = 0, end = 0;
120-
if (!repeat) {
121-
for (size_t i = 0; i < scales.size(); ++i) {
122-
result.push_back(result.back() + scales[i] * (level[i + 1] - level[i]));
123-
}
124-
} else {
125-
for (size_t i = 0; i < scales.size(); ++i) {
126-
start = indexes[i];
127-
end = indexes[i + 1];
128-
for (size_t j = 0; j < scales[i]; ++j) {
129-
for (size_t index = start; index < end - 1; ++index) {
130-
result.push_back(result.back() + level[index + 1] - level[index]);
131-
}
132-
}
133-
}
134-
}
135-
return result;
136-
}
137-
138115
} // namespace framework
139116
} // namespace paddle

paddle/framework/lod_tensor.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,5 @@ class LoDTensor : public Tensor {
136136
LoD lod_;
137137
};
138138

139-
Vector<size_t> expand_lod(Vector<size_t> level, Vector<size_t> indexes,
140-
Vector<size_t> scales, bool repeat);
141-
142139
} // namespace framework
143140
} // namespace paddle

paddle/operators/seq_expand_op.cc

Lines changed: 49 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,14 @@ class SeqExpandOp : public framework::OperatorWithKernel {
2727
void InferShape(framework::InferShapeContext* ctx) const override {
2828
PADDLE_ENFORCE(ctx->HasInput("X"),
2929
"Input(X) of SeqExpandOp should not be null.");
30-
int repeat = ctx->Attrs().Get<int>("repeat");
31-
framework::DDim out_dim;
32-
if (repeat == 0) {
33-
PADDLE_ENFORCE(
34-
ctx->HasInput("Y"),
35-
"Input(Y) of SeqExpandOp should not be null while repeat == 0.");
36-
out_dim = ctx->GetInputDim("Y");
37-
ctx->ShareLoD("Y", "Out");
38-
} else {
39-
out_dim = ctx->GetInputDim("X");
40-
out_dim[0] = out_dim[0] * repeat;
41-
}
4230
PADDLE_ENFORCE(ctx->HasOutput("Out"),
4331
"Output(Out) of SeqExpandOp should not be null.");
32+
PADDLE_ENFORCE(
33+
ctx->HasInput("Y"),
34+
"Input(Y) of SeqExpandOp should not be null while repeat == 0.");
35+
framework::DDim out_dim;
36+
out_dim = ctx->GetInputDim("Y");
37+
ctx->ShareLoD("Y", "Out");
4438
ctx->SetOutputDim("Out", out_dim);
4539
}
4640
};
@@ -50,68 +44,63 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker {
5044
SeqExpandOpMaker(framework::OpProto* proto,
5145
framework::OpAttrChecker* op_checker)
5246
: OpProtoAndCheckerMaker(proto, op_checker) {
53-
AddInput(
54-
"X",
55-
"The input('X') of seq_expand op. It can be LoDTensor or base Tensor.");
56-
AddInput(
57-
"Y",
58-
"The reference input('Y') of seq_expand op."
59-
"It must be a LoDTensor with k-level(k>0)."
60-
"This reference input is essential if 'repeat' attribute is not "
61-
"configured."
62-
"Input(X) will be expanded by LoD of input(Y) while repeat == 0.");
47+
AddInput("X",
48+
"(Tensor or LoDTensor) The input('X') of this operator can be a "
49+
"LoDTensor or a base Tensor.");
50+
AddInput("Y",
51+
"(LoDTensor)The reference input('Y') of seq_expand op."
52+
"It must be a LoDTensor with k-level(k>0)."
53+
"Input(X) will be expanded according to LOD of input(Y)."
54+
"The element numbers of last level in input('Y') "
55+
"must be equal to dims[0] of input('X').");
6356
AddOutput("Out",
6457
"The output of seq_expand op."
65-
"The output is a (k+1)-level LoDTensor"
66-
"while input(X) being k-level LoDTensor."
67-
"(Given base tensor is 0-level LoDTensor.)");
68-
AddAttr<int>("repeat",
69-
"(type:int; default value: 0)"
70-
"Repeatting times of each element while expanding input(X)."
71-
"It works while input(Y) is not configured.")
72-
.SetDefault(0);
58+
"The lod of output will be as same as input(Y)'s lod.");
7359
AddComment(R"DOC(
74-
Expand k-level LoDTensor to (k+1)-level LoDTensor
75-
by lod of input(Y) or 'repeat' attribute.
60+
Expand input(X) according to LOD of input(Y).
7661
7762
Case 1:
7863
79-
Given a 2-level LoDTensor X:
80-
X.data = [a, b , c, d]
81-
X.lod = [[0, 3, 4], [0, 1, 3, 4]]
82-
and
83-
repeat = 2
84-
then we get 3-level LoDTensor
85-
Out.lod = [[0, 6, 8],
86-
[0, 3, 6, 7, 8],
87-
[0, 1, 3, 4, 6, 7, 8]]
88-
Out.data = [a, b, c, a, b, c, d, d]
64+
Given 2-level a LoDTensor input(X)
65+
X.lod = [[0, 2, 3],
66+
[0, 1, 3, 4]]
67+
X.data = [a, b, c, d]
68+
X.dims = [4, 1]
69+
and input(Y)
70+
Y.lod = [[0, 2, 4],
71+
[0, 3, 6, 7, 8]]
72+
then we get 2-level LoDTensor
73+
Out.lod = [[0, 2, 4],
74+
[0, 3, 6, 7, 8]]
75+
Out.data = [a, a, a, b, b, b, c, d]
76+
Out.dims = [8, 1]
8977
9078
Case 2:
9179
92-
Given 2-level a LoDTensor X
93-
X.data = [1, 2, 3, 4]
94-
X.lod = [[0, 3, 4], [0, 1, 3, 4]]
95-
and
96-
Y.lod = [[0, 6, 8],
97-
[0, 3, 6, 7, 8],
98-
[0,1,3,4,6,7,8]]
99-
then we get 3-level LoDTensor
100-
Out.data = [1, 2, 3, 1, 2, 3, 4, 4]
101-
Out.lod = [[0, 6, 8],
102-
[0, 3, 6, 7, 8],
103-
[0, 1, 3, 4, 6, 7, 8]]
80+
Given a 0-level LoDTensor input(X)
81+
X.data = [a, b, c]
82+
X.lod = NULL
83+
X.dims = [3, 1]
84+
and input(Y)
85+
Y.lod = [[0, 2, 3, 6]]
86+
then we get 1-level LoDTensor
87+
Out.lod = [[0, 2, 3, 6]]
88+
Out.data = [a, a, b, c, c, c]
89+
Out.dims = [6, 1]
10490
10591
Case 3:
10692
107-
Given a 0-level LoDTensor X
108-
X.data = [1, 2, 3, 4]
93+
Given a 0-level LoDTensor input(X)
94+
X.data = [[a, b], [c, d], [e, f]]
10995
X.lod = NULL
110-
and
111-
repeat = 2
96+
X.dims = [3, 2]
97+
and input(Y)
98+
Y.lod = [[0, 2, 3, 6]]
11299
then we get 1-level LoDTensor
113-
Out.data = [1, 1, 2, 2, 3, 3, 4, 4]
114-
Out.lod = [[0, 2, 4, 6, 8]]
100+
Out.lod = [[0, 2, 3, 6]]
101+
Out.data = [[a,b], [a,b] [c,d], [e, f], [e, f], [e, f]]
102+
Out.dims = [6, 2]
103+
115104
116105
)DOC");
117106
}

paddle/operators/seq_expand_op.h

Lines changed: 31 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -31,93 +31,28 @@ class SeqExpandKernel : public framework::OpKernel<T> {
3131
auto* out = context.Output<LoDTensor>("Out");
3232
const T* x_data = x->data<T>();
3333
auto x_dims = x->dims();
34-
auto x_lod = x->lod();
35-
36-
framework::Vector<size_t> level;
37-
size_t num = (x_lod.size() == 0) ? (x->dims()[0] + 1) : x_lod[0].size();
38-
for (int i = 0; i < num; ++i) {
39-
level.push_back(i);
40-
}
41-
x_lod.push_back(level);
42-
43-
size_t repeat = static_cast<size_t>(context.Attr<int>("repeat"));
44-
framework::Vector<size_t> scales;
45-
if (repeat != 0) {
46-
for (int i = 0; i < x_lod[0].size() - 1; ++i) {
47-
scales.push_back(repeat);
48-
}
49-
std::vector<int64_t> dims = framework::vectorize(x->dims());
50-
dims[0] = dims[0] * repeat;
51-
auto out_dims = framework::make_ddim(dims);
52-
out->Resize(out_dims);
53-
} else {
54-
auto* y = context.Input<LoDTensor>("Y");
55-
auto y_lod = y->lod();
56-
auto y_abs_lod = y_lod.ToAbsOffset();
57-
auto x_abs_lod = x_lod.ToAbsOffset();
58-
for (int i = 0; i < y_abs_lod[0].size() - 1; ++i) {
59-
scales.push_back((y_abs_lod[0][i + 1] - y_abs_lod[0][i]) /
60-
(x_abs_lod[0][i + 1] - x_abs_lod[0][i]));
61-
}
62-
out->Resize(y->dims());
63-
}
64-
65-
framework::Vector<size_t> indexes;
66-
for (int size_t i = 0; i < x_lod[0]; ++i) {
67-
indexes[i] = x_lod[0];
68-
}
69-
framework::LoD out_lod;
70-
auto level0 = framework::expand_lod(indexes, x_lod[0], scales, false);
71-
out_lod.push_back(level0);
72-
for (int i = 1; i < x_lod.size(); ++i) {
73-
for (int j = 0; j < indexes.size(); ++j) {
74-
indexes[j] = x_lod[i - 1][indexes[j]];
75-
}
76-
out_lod.push_back(framework::expand_lod(x_lod[i], indexes, scales, true));
77-
}
78-
34+
auto* y = context.Input<LoDTensor>("Y");
35+
PADDLE_ENFORCE_EQ(x_dims[0], y->lod().back().size() - 1,
36+
"The size of last lod level in Input(Y)"
37+
"must be equal to dims[0] of Input(X).");
38+
out->set_lod(y->lod());
39+
out->Resize(y->dims());
40+
auto place = context.GetEigenDevice<Place>();
7941
size_t element_len = framework::product(x_dims) / x_dims[0];
8042
T* out_data = out->mutable_data<T>(context.GetPlace());
81-
82-
// copy data
83-
auto place = context.GetPlace();
84-
size_t count = 0;
85-
if (platform::is_cpu_place(place)) {
86-
auto& cpu_place = boost::get<platform::CPUPlace>(place);
87-
for (size_t i = 0; i < scales.size(); ++i) {
88-
count = element_len * (x_abs_lod[0][i + 1] - x_abs_lod[0][i]);
89-
for (size_t j = 0; j < scales[i]; ++j) {
90-
memory::Copy(cpu_place, out_data, cpu_place, x_data,
91-
sizeof(T) * count);
92-
out_data += count;
93-
}
94-
x_data += count;
95-
}
96-
} else {
97-
#ifdef PADDLE_WITH_CUDA
98-
auto& gpu_place = boost::get<platform::GPUPlace>(place);
99-
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
100-
context.device_context())
101-
.stream();
102-
for (size_t i = 0; i < scales.size(); ++i) {
103-
count = element_len * (x_abs_lod[0][i + 1] - x_abs_lod[0][i]);
104-
for (size_t j = 0; j < scales[i]; ++j) {
105-
memory::Copy(gpu_place, out_data, gpu_place, x_data,
106-
sizeof(T) * count, stream);
107-
out_data += count;
108-
}
109-
x_data += count;
110-
}
111-
#else
112-
PADDLE_THROW("Paddle is not compiled with GPU");
113-
#endif
114-
}
115-
116-
out->set_lod(out_lod);
117-
for (size_t i = 0; i < lod.size; i++) {
118-
for (size_t j = 0; j < lod[i].size(); j++) {
119-
LOG(INFO) << "lod[" << i << "][" << j "] = " << lod[i][j];
120-
}
43+
auto out_starts = out->lod().back();
44+
45+
for (size_t i = 0; i < out_starts.size() - 1; i++) {
46+
int scale = out_starts[i + 1] - out_starts[i];
47+
Eigen::TensorMap<
48+
Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
49+
x_t(x_data, 1, element_len);
50+
Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
51+
out_t(out_data, scale, element_len);
52+
Eigen::array<int, 2> cast({scale, 1});
53+
out_t.device(place) = x_t.broadcast(cast);
54+
x_data += element_len;
55+
out_data += element_len * scale;
12156
}
12257
}
12358
};
@@ -130,25 +65,24 @@ class SeqExpandGradKernel : public framework::OpKernel<T> {
13065
auto* x = context.Input<LoDTensor>("X");
13166
auto* out = context.Input<LoDTensor>("Out");
13267
auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
133-
auto out_lod = out->lod();
134-
auto out_abs_lod = out_lod.ToAbsOffset();
68+
auto out_last_level = out->lod().back();
13569
d_x->set_lod(x->lod());
13670
const T* d_out_data = d_out->data<T>();
13771
auto d_out_dims = d_out->dims();
13872
T* d_x_data = d_x->mutable_data<T>(context.GetPlace());
13973
size_t element_len = framework::product(d_out_dims) / d_out_dims[0];
140-
for (size_t i = 0; i < out->NumElements(); ++i) {
141-
size_t ele_count = out_abs_lod[0][i + 1] - out_abs_lod[0][i];
142-
size_t repeat = out->NumElements(0, i);
143-
Eigen::TensorMap<Eigen::Tensor<const T, 2>> d_out_t(
144-
d_out_data, static_cast<int>(repeat),
145-
static_cast<int>((ele_count * element_len) / repeat));
146-
Eigen::TensorMap<Eigen::Tensor<T, 1>> d_x_t(
147-
d_x_data, static_cast<int>((ele_count * element_len) / repeat));
74+
75+
for (size_t i = 0; i < out_last_level.size() - 1; ++i) {
76+
size_t repeat = out_last_level[i + 1] - out_last_level[i];
77+
Eigen::TensorMap<
78+
Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
79+
d_out_t(d_out_data, static_cast<int>(repeat), element_len);
80+
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>
81+
d_x_t(d_x_data, static_cast<int>(element_len));
14882
auto place = context.GetEigenDevice<Place>();
14983
d_x_t.device(place) = d_out_t.sum(Eigen::array<int, 1>({{0}}));
150-
d_out_data += (ele_count * element_len);
151-
d_x_data += ((ele_count * element_len) / repeat);
84+
d_out_data += (repeat * element_len);
85+
d_x_data += element_len;
15286
}
15387
}
15488
};

python/paddle/v2/framework/tests/op_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,6 @@ def check_output_with_place(self, place, atol):
246246
else:
247247
actual = np.array(self.scope.find_var(out_name).get_tensor())
248248
expect = self.outputs[out_name]
249-
print "actual= %s" % actual
250-
print "expect = %s" % expect
251249
self.assertTrue(
252250
np.allclose(
253251
actual, expect, atol=atol),

0 commit comments

Comments
 (0)