Skip to content

Commit f72729d

Browse files
reyoungQiJune
authored andcommitted
Feature/rnn to array to lod tensor (#5411)
* Add LoDRankTable LoD Rank Table stores the `level` of `lod` which is ordered by sequence length in descending order. It is useful when implement dynamic RNN and is shared by dynamic RNN memory, dynamic RNN slice input and dynamic RNN slice output operators. * Add skeleton for array_to_lod_tensor and lod_tensor_to_array * Add VarType::LoDTensorArray * Add PyBind of LoDTensorArray * Add InferVarType * Add first unittest * Add ut * Add unittest * Add unittest * Add unittests * update * init * add infershape for lod_tensor_to_array_op * compelete array_to_lod_tensor_op * copy data * clean code * clean code * Fix unittest data * fix bugs * fix compile error * Refine TensorToArrayOp * refactor array_to_lod_tensor * Unittest * fix bugs * Fix unittest * Fix unittest * debug * Debug * Fix unittest * clean code * refactor * use ostream * update test * fix gpu build error * make gpu test pass
1 parent d9e5eba commit f72729d

14 files changed

+514
-47
lines changed

paddle/framework/ddim.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ int64_t DDim::operator[](int idx) const {
117117
return boost::apply_visitor(DynamicConstIndexer(idx), var);
118118
}
119119

120-
int64_t DDim::size() const { return arity(*this); }
120+
int DDim::size() const { return arity(*this); }
121121

122122
bool DDim::operator==(DDim d) const {
123123
if (var.which() != d.getVar().which()) {

paddle/framework/ddim.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ struct DDim {
7171

7272
DDim operator*(DDim d) const;
7373

74-
int64_t size() const;
74+
int size() const;
7575
};
7676

7777
/**

paddle/framework/lod_rank_table.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
3131
TableItem item;
3232
item.index = i;
3333
item.length = vec[i + 1] - vec[i];
34+
VLOG(10) << "Add item to rank table " << item.index << " " << item.length;
3435
items_.emplace_back(item);
3536
}
3637
// NOTE(yuyang18):

paddle/framework/lod_tensor.cc

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@
2727
namespace paddle {
2828
namespace framework {
2929

30+
std::ostream& operator<<(std::ostream& os, const LoD& lod) {
31+
os << "{";
32+
for (auto& v : lod) {
33+
os << "{";
34+
for (auto& i : v) {
35+
os << i << ",";
36+
}
37+
os << "}";
38+
}
39+
os << "}";
40+
41+
return os;
42+
}
43+
3044
LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
3145
LoD new_lod;
3246
new_lod.reserve(level_end - level_begin);
@@ -136,37 +150,35 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
136150
ShareDataWith(Slice(begin, end));
137151
}
138152

139-
void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx,
140-
std::vector<std::vector<size_t>>* lod_length,
141-
size_t* start_offset) {
142-
lod_length->clear();
143-
PADDLE_ENFORCE(start_idx < lod.size() - 1,
144-
"start_idx should be >= 0 and < lod.size() - 1.");
145-
PADDLE_ENFORCE(end_idx < lod.size(),
146-
"end_idx should be >= 0 and < lod.size().");
147-
PADDLE_ENFORCE_LE(start_idx, end_idx,
148-
"start_idx should be less than end_idx.");
149-
for (size_t level_idx = 0; level_idx < lod.size(); ++level_idx) {
153+
using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
154+
LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx,
155+
size_t end_idx, size_t start_level) {
156+
LoD sub_lod;
157+
158+
for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
159+
PADDLE_ENFORCE_LE(start_idx, end_idx);
160+
PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
150161
std::vector<size_t> level_lens;
151162
for (size_t i = start_idx; i < end_idx; ++i) {
152163
level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
153164
}
154-
lod_length->emplace_back(level_lens);
165+
sub_lod.emplace_back(level_lens);
155166
start_idx = lod[level_idx][start_idx];
156167
end_idx = lod[level_idx][end_idx];
157168
}
158-
*start_offset = start_idx;
169+
170+
return LoDAndOffset{sub_lod, {start_idx, end_idx}};
159171
}
160172

161-
void AppendLoD(LoD* lod, const std::vector<std::vector<size_t>>& lod_length) {
162-
PADDLE_ENFORCE_EQ(
163-
lod->size(), lod_length.size(),
173+
void AppendLoD(LoD* lod, const LoD& lod_length) {
174+
PADDLE_ENFORCE(
175+
lod->empty() || lod->size() == lod_length.size(),
164176
"The lod_length should has the same size with the appended lod.");
177+
if (lod->empty()) {
178+
*lod = LoD(lod_length.size(), std::vector<size_t>({0}));
179+
}
165180
for (size_t i = 0; i < lod->size(); ++i) {
166181
auto& level = (*lod)[i];
167-
if (level.empty()) {
168-
level.push_back(0);
169-
}
170182
for (size_t len : lod_length[i]) {
171183
level.push_back(level.back() + len);
172184
}

paddle/framework/lod_tensor.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ using Vector = thrust::host_vector<
5656
*/
5757
using LoD = std::vector<Vector<size_t>>;
5858

59+
std::ostream& operator<<(std::ostream& os, const LoD& lod);
60+
5961
/*
6062
* Slice levels from a LoD.
6163
* NOTE the lowest level should always be the absolute offsets of the underlying
@@ -181,11 +183,10 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
181183
return tensor;
182184
}
183185

184-
void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx,
185-
std::vector<std::vector<size_t>>* lod_length,
186-
size_t* start_offset);
186+
std::pair<LoD, std::pair<size_t, size_t>> GetSubLoDAndAbsoluteOffset(
187+
const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level);
187188

188-
void AppendLoD(LoD* lod, const std::vector<std::vector<size_t>>& lod_length);
189+
void AppendLoD(LoD* lod, const LoD& lod_length);
189190

190191
} // namespace framework
191192
} // namespace paddle

paddle/framework/lod_tensor_test.cc

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -146,43 +146,44 @@ TEST(LodExpand, test) {
146146

147147
TEST(LoD, GetFineGrainedLoDLength) {
148148
LoD lod;
149-
lod.push_back(std::vector<size_t>{0, 2, 4, 5});
150-
lod.push_back(std::vector<size_t>{0, 1, 6, 8, 10, 11});
149+
lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
150+
lod.push_back(std::vector<size_t>({0, 1, 6, 8, 10, 11}));
151151
lod.push_back(
152-
std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26, 29});
152+
std::vector<size_t>({0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26, 29}));
153153

154-
std::vector<std::vector<size_t>> lod_length;
155-
size_t start_offset;
156-
paddle::framework::GetFineGrainedLoDLength(lod, 1, 2, &lod_length,
157-
&start_offset);
154+
auto lod_and_offset =
155+
paddle::framework::GetSubLoDAndAbsoluteOffset(lod, 1, 2, 0);
156+
LoD lod_length = lod_and_offset.first;
157+
size_t start_offset = lod_and_offset.second.first;
158+
size_t end_offset = lod_and_offset.second.second;
158159

159-
std::vector<std::vector<size_t>> expected;
160+
LoD expected;
160161
expected.push_back(std::vector<size_t>{2});
161162
expected.push_back(std::vector<size_t>{2, 2});
162163
expected.push_back(std::vector<size_t>{2, 3, 4, 2});
163164
EXPECT_EQ(lod_length, expected);
164165
EXPECT_EQ(start_offset, 15UL);
166+
EXPECT_EQ(end_offset, 26UL);
165167
}
166168

167169
TEST(LoD, AppendLoD) {
168-
std::vector<std::vector<size_t>> lod_lens;
169-
lod_lens.push_back(std::vector<size_t>{2});
170-
lod_lens.push_back(std::vector<size_t>{2, 2});
171-
lod_lens.push_back(std::vector<size_t>{2, 3, 4, 2});
170+
LoD lod_lens;
171+
lod_lens.push_back(std::vector<size_t>({2}));
172+
lod_lens.push_back(std::vector<size_t>({2, 2}));
173+
lod_lens.push_back(std::vector<size_t>({2, 3, 4, 2}));
172174

173175
LoD origin;
174-
origin.push_back(std::vector<size_t>{0, 2});
175-
origin.push_back(std::vector<size_t>{0, 1, 6});
176-
origin.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15});
176+
origin.push_back(std::vector<size_t>({0, 2}));
177+
origin.push_back(std::vector<size_t>({0, 1, 6}));
178+
origin.push_back(std::vector<size_t>({0, 2, 5, 7, 10, 12, 15}));
177179

178180
paddle::framework::AppendLoD(&origin, lod_lens);
179181

180182
LoD expected;
181-
expected.push_back(std::vector<size_t>{0, 2, 4});
182-
expected.push_back(std::vector<size_t>{0, 1, 6, 8, 10});
183+
expected.push_back(std::vector<size_t>({0, 2, 4}));
184+
expected.push_back(std::vector<size_t>({0, 1, 6, 8, 10}));
183185
expected.push_back(
184-
std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26});
185-
186+
std::vector<size_t>({0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26}));
186187
EXPECT_EQ(origin, expected);
187188
}
188189

paddle/framework/var_desc.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) {
4545
desc_.mutable_tensor_array()->set_lod_level(lod_level);
4646
break;
4747
default:
48-
PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type());
48+
PADDLE_THROW("Tensor type=%d does not support LoDLevel",
49+
desc_.tensor_array().lod_level());
4950
}
5051
}
5152

@@ -56,7 +57,8 @@ int32_t VarDescBind::GetLodLevel() const {
5657
case VarDesc::LOD_TENSOR_ARRAY:
5758
return desc_.tensor_array().lod_level();
5859
default:
59-
PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type());
60+
PADDLE_THROW("Tensor type=%d does not support LoDLevel",
61+
desc_.tensor_array().lod_level());
6062
}
6163
}
6264

paddle/operators/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ set(DEPS_OPS
170170
sequence_conv_op
171171
sequence_pool_op
172172
lod_rank_table_op
173+
lod_tensor_to_array_op
174+
array_to_lod_tensor_op
173175
lstm_op
174176
tensor_array_read_write_op
175177
gru_op)
@@ -182,6 +184,8 @@ op_library(sum_op DEPS net_op selected_rows_functor)
182184
op_library(pool_op DEPS pooling)
183185
op_library(pool_with_index_op DEPS pooling)
184186
op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table)
187+
op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op)
188+
op_library(array_to_lod_tensor_op SRCS array_to_lod_tensor_op.cc DEPS lod_rank_table_op)
185189
op_library(tensor_array_read_write_op SRCS tensor_array_read_write_op.cc)
186190
if(WITH_GPU)
187191
op_library(nccl_op DEPS nccl_common)
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
#include <numeric>
15+
#include "paddle/framework/lod_rank_table.h"
16+
#include "paddle/framework/lod_tensor_array.h"
17+
#include "paddle/framework/op_registry.h"
18+
#include "paddle/memory/memcpy.h"
19+
20+
namespace paddle {
21+
namespace operators {
22+
23+
using LoD = framework::LoD;
24+
25+
class ArrayToLoDTensorOp : public framework::OperatorBase {
26+
public:
27+
ArrayToLoDTensorOp(const std::string &type,
28+
const framework::VariableNameMap &inputs,
29+
const framework::VariableNameMap &outputs,
30+
const framework::AttributeMap &attrs)
31+
: OperatorBase(type, inputs, outputs, attrs) {}
32+
void Run(const framework::Scope &scope,
33+
const platform::DeviceContext &dev_ctx) const override {
34+
auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensorArray>();
35+
auto &rank_table =
36+
scope.FindVar(Input("RankTable"))->Get<framework::LoDRankTable>();
37+
auto *out =
38+
scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>();
39+
40+
// Check dims, place and data type of input's elements and infer output's
41+
// dim
42+
PADDLE_ENFORCE(!x.empty(), "There's no element in the input array.");
43+
int rank = x[0].dims().size();
44+
platform::Place place = x[0].place();
45+
std::type_index data_type = x[0].type();
46+
framework::DDim ins_dims = framework::slice_ddim(x[0].dims(), 1, rank);
47+
int64_t batch_size = x[0].dims()[0];
48+
for (size_t i = 1; i < x.size(); ++i) {
49+
PADDLE_ENFORCE_EQ(framework::slice_ddim(x[i].dims(), 1, rank), ins_dims,
50+
"The dimension of the %zu'th element in LoDTensorArray "
51+
"differs from previous ones.",
52+
i);
53+
PADDLE_ENFORCE(platform::places_are_same_class(x[i].place(), place),
54+
"The place class of the %zu'th element in LoDTensorArray "
55+
"differs from previous ones.",
56+
i);
57+
PADDLE_ENFORCE(x[i].type() == data_type,
58+
"The date type of the %zu'th element in LoDTensorArray "
59+
"differs from previous ones.",
60+
i);
61+
batch_size += x[i].dims()[0];
62+
}
63+
auto ins_dim_vec = framework::vectorize(ins_dims);
64+
ins_dim_vec.insert(ins_dim_vec.begin(), batch_size);
65+
framework::DDim out_dims = framework::make_ddim(ins_dim_vec);
66+
out->Resize(out_dims);
67+
out->mutable_data(place, data_type);
68+
69+
auto &table_items = rank_table.items();
70+
std::vector<size_t> table_item_idx(table_items.size());
71+
// table_item_idx = range(table_items_idx.size())
72+
std::iota(table_item_idx.begin(), table_item_idx.end(), 0);
73+
std::sort(table_item_idx.begin(), table_item_idx.end(),
74+
[&](size_t a, size_t b) {
75+
return table_items[a].index < table_items[b].index;
76+
});
77+
78+
// Build LoDTensor `out`
79+
framework::LoD *out_lod = out->mutable_lod();
80+
out_lod->clear();
81+
size_t out_offset = 0;
82+
auto prefix_lod = rank_table.coarse_lod();
83+
prefix_lod.emplace_back();
84+
auto &cur_level_lod = prefix_lod.back();
85+
cur_level_lod.push_back(0);
86+
for (size_t idx : table_item_idx) {
87+
cur_level_lod.push_back(cur_level_lod.back() + table_items[idx].length);
88+
for (size_t x_idx = 0; x_idx < table_items[idx].length; ++x_idx) {
89+
auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset(
90+
x[x_idx].lod(), idx, idx + 1, 0);
91+
92+
auto &lod_length = lod_and_offset.first;
93+
framework::AppendLoD(out_lod, lod_length);
94+
95+
size_t start_offset = lod_and_offset.second.first;
96+
size_t end_offset = lod_and_offset.second.second;
97+
VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " ["
98+
<< ", " << end_offset << "]";
99+
// Copy data
100+
PADDLE_ENFORCE_GE(end_offset, start_offset);
101+
size_t len = end_offset - start_offset;
102+
if (len == 0) {
103+
continue;
104+
}
105+
out->Slice(out_offset, out_offset + len)
106+
.CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, dev_ctx);
107+
out_offset += len;
108+
}
109+
}
110+
out_lod->insert(out_lod->begin(), prefix_lod.begin(), prefix_lod.end());
111+
}
112+
};
113+
114+
class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
115+
public:
116+
ArrayToLoDTensorOpProtoMaker(framework::OpProto *proto,
117+
framework::OpAttrChecker *op_checker)
118+
: OpProtoAndCheckerMaker(proto, op_checker) {
119+
AddInput("X",
120+
"(std::vector<LodTensor>) A vector of tensors that is going to "
121+
"be casted to a big LoDTensor.");
122+
AddInput("RankTable",
123+
"(LoDRankTable) RankTable provides the coarse lod infomation to "
124+
"build the output LoDTensor. See "
125+
"'paddle/framework/lod_rank_table.h' for more details.");
126+
AddOutput("Out", "(LoDTensor) The LoDTensor formed by input tensor array.");
127+
AddComment(
128+
R"DOC(This Op build a big LoDTensor from a std::vector<LoDTensor>
129+
and a LoDRankTable. It is supposed to be used in getting dynamic RNN's
130+
outputs back to a normal LoDTensor. The std::vector<LoDTensor>
131+
would be the output of RNN Op and the LoDRankTable would be build
132+
with RNN's input.)DOC");
133+
}
134+
};
135+
136+
class ArrayToLoDTensorInferShape : public framework::InferShapeBase {
137+
public:
138+
void operator()(framework::InferShapeContext *context) const override {
139+
PADDLE_ENFORCE(context->HasInput("X"),
140+
"ArrayToLoDTensorOp must has input X.");
141+
PADDLE_ENFORCE(context->HasInput("RankTable"),
142+
"ArrayToLoDTensorOp must has input RankTable.");
143+
}
144+
};
145+
146+
} // namespace operators
147+
} // namespace paddle
148+
149+
namespace ops = paddle::operators;
150+
REGISTER_OPERATOR(array_to_lod_tensor, ops::ArrayToLoDTensorOp,
151+
ops::ArrayToLoDTensorOpProtoMaker,
152+
ops::ArrayToLoDTensorInferShape);

paddle/operators/lod_rank_table_op.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class LoDRankTableOp : public framework::OperatorBase {
2828
auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
2929
auto *out =
3030
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
31+
VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level"));
3132
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
3233
}
3334
};

0 commit comments

Comments
 (0)