Skip to content

Commit f07a226

Browse files
authored
add split and merge lod tensor operator (#5537)
* add split lod tensor operator * add more test cast * clean code * add merge lod tensor operator * fix bug * clean code * add grad operator * make mask support GPU * add comments
1 parent 7c1755d commit f07a226

File tree

4 files changed

+590
-1
lines changed

4 files changed

+590
-1
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/framework/op_registry.h"
16+
#include "paddle/memory/memcpy.h"
17+
18+
namespace paddle {
19+
namespace operators {
20+
21+
using LoD = framework::LoD;
22+
23+
class MergeLoDTensorOp : public framework::OperatorBase {
24+
public:
25+
MergeLoDTensorOp(const std::string &type,
26+
const framework::VariableNameMap &inputs,
27+
const framework::VariableNameMap &outputs,
28+
const framework::AttributeMap &attrs)
29+
: OperatorBase(type, inputs, outputs, attrs) {}
30+
void Run(const framework::Scope &scope,
31+
const platform::DeviceContext &dev_ctx) const override {
32+
auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
33+
auto &mask = scope.FindVar(Input("Mask"))->Get<framework::LoDTensor>();
34+
auto &in_true = scope.FindVar(Input("InTrue"))->Get<framework::LoDTensor>();
35+
auto &in_false =
36+
scope.FindVar(Input("InFalse"))->Get<framework::LoDTensor>();
37+
auto *out =
38+
scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>();
39+
auto level = static_cast<size_t>(Attr<int>("level"));
40+
41+
auto &mask_dim = mask.dims();
42+
43+
std::unique_ptr<framework::LoDTensor> cpu_mask{new framework::LoDTensor()};
44+
if (platform::is_cpu_place(mask.place())) {
45+
cpu_mask->ShareDataWith(mask);
46+
} else if (platform::is_gpu_place(mask.place())) {
47+
#ifdef PADDLE_WITH_CUDA
48+
cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx);
49+
#else
50+
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
51+
#endif
52+
}
53+
auto *mask_data = cpu_mask->data<bool>();
54+
55+
int rank = in_true.dims().size();
56+
platform::Place place = in_true.place();
57+
std::type_index data_type = in_true.type();
58+
framework::DDim in_true_dims =
59+
framework::slice_ddim(in_true.dims(), 1, rank);
60+
61+
int64_t batch_size = in_true.dims()[0] + in_false.dims()[0];
62+
63+
auto in_true_dim_vec = framework::vectorize(in_true_dims);
64+
in_true_dim_vec.insert(in_true_dim_vec.begin(), batch_size);
65+
66+
framework::DDim out_dims = framework::make_ddim(in_true_dim_vec);
67+
out->Resize(out_dims);
68+
out->mutable_data(place, data_type);
69+
70+
auto *out_lod = out->mutable_lod();
71+
out_lod->clear();
72+
size_t out_offset = 0;
73+
74+
// Build LoDTensor `out`
75+
76+
size_t in_true_idx = 0;
77+
size_t in_false_idx = 0;
78+
for (size_t i = 0; i < static_cast<size_t>(mask_dim[0]); i++) {
79+
const framework::LoDTensor *input = nullptr;
80+
size_t *in_idx = nullptr;
81+
if (static_cast<int>(mask_data[i]) == 0) {
82+
input = &in_false;
83+
in_idx = &in_false_idx;
84+
} else {
85+
input = &in_true;
86+
in_idx = &in_true_idx;
87+
}
88+
auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset(
89+
input->lod(), *in_idx, (*in_idx) + 1, 0);
90+
auto &lod_length = lod_and_offset.first;
91+
92+
framework::AppendLoD(out_lod, lod_length);
93+
94+
size_t start_offset = lod_and_offset.second.first;
95+
size_t end_offset = lod_and_offset.second.second;
96+
97+
PADDLE_ENFORCE_GE(end_offset, start_offset);
98+
size_t len = end_offset - start_offset;
99+
if (len == 0) {
100+
continue;
101+
}
102+
out->Slice(out_offset, out_offset + len)
103+
.CopyFrom(input->Slice(start_offset, end_offset), place, dev_ctx);
104+
out_offset += len;
105+
(*in_idx) += 1;
106+
}
107+
108+
for (size_t i = 0; i < level; i++) {
109+
out_lod->insert(out_lod->begin(), x.lod()[i]);
110+
}
111+
}
112+
};
113+
114+
class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
115+
public:
116+
MergeLoDTensorOpProtoMaker(framework::OpProto *proto,
117+
framework::OpAttrChecker *op_checker)
118+
: OpProtoAndCheckerMaker(proto, op_checker) {
119+
AddInput("X",
120+
"The input LoDTensor, contains complete lod information to "
121+
"construct the output");
122+
AddInput("Mask", "A bool column vector which mask the input");
123+
AddInput("InTrue", "The True branch to be merged");
124+
AddInput("InFalse", "The False branch to be merged");
125+
AddOutput("Out", "The merged output LoDTensor");
126+
AddAttr<int>("level", "(int) the specific lod level to rank.")
127+
.SetDefault(0)
128+
.EqualGreaterThan(0);
129+
AddComment(
130+
R"DOC(
131+
Merge True and False branches of LoDTensor into a single Output,
132+
with a mask at certain lod level. X is used to obtain complete
133+
lod information. Please refer to SplitLoDTensorOp.)DOC");
134+
}
135+
};
136+
137+
class MergeLoDTensorInferShape : public framework::InferShapeBase {
138+
public:
139+
void operator()(framework::InferShapeContext *context) const override {
140+
PADDLE_ENFORCE(context->HasInput("X"),
141+
"MergeLoDTensorOp must has input X.");
142+
PADDLE_ENFORCE(context->HasInput("Mask"),
143+
"MergeLoDTensorOp must has input Mask.");
144+
PADDLE_ENFORCE(context->HasInput("InTrue"),
145+
"MergeLoDTensorOp must has input InTrue.");
146+
PADDLE_ENFORCE(context->HasInput("InFalse"),
147+
"MergeLoDTensorOp must has input InFalse.");
148+
PADDLE_ENFORCE(context->HasOutput("Out"),
149+
"MergeLoDTensorOp must has output Out");
150+
151+
auto mask_dim = context->GetInputDim("Mask");
152+
PADDLE_ENFORCE_EQ(mask_dim.size(), 2);
153+
PADDLE_ENFORCE_EQ(mask_dim[1], 1);
154+
155+
context->SetOutputDim("Out", context->GetInputDim("InTrue"));
156+
}
157+
};
158+
159+
class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker {
160+
public:
161+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
162+
163+
protected:
164+
std::unique_ptr<framework::OpDescBind> Apply() const override {
165+
auto *grad_op = new framework::OpDescBind();
166+
grad_op->SetType("split_lod_tensor");
167+
grad_op->SetInput("X", OutputGrad("Out"));
168+
grad_op->SetInput("Mask", Input("Mask"));
169+
grad_op->SetOutput("OutTrue", InputGrad("InTrue"));
170+
grad_op->SetOutput("OutFalse", InputGrad("InFalse"));
171+
grad_op->SetAttrMap(Attrs());
172+
return std::unique_ptr<framework::OpDescBind>(grad_op);
173+
}
174+
};
175+
176+
} // namespace operators
177+
} // namespace paddle
178+
179+
namespace ops = paddle::operators;
180+
REGISTER_OPERATOR(merge_lod_tensor, ops::MergeLoDTensorOp,
181+
ops::MergeLoDTensorOpProtoMaker,
182+
ops::MergeLoDTensorInferShape, ops::MergeLoDTensorGradMaker);
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/framework/op_registry.h"
16+
#include "paddle/memory/memcpy.h"
17+
18+
namespace paddle {
19+
namespace operators {
20+
21+
struct CopyRange {
22+
size_t begin;
23+
size_t end;
24+
};
25+
26+
using LoD = framework::LoD;
27+
28+
class SplitLoDTensorOp : public framework::OperatorBase {
29+
public:
30+
SplitLoDTensorOp(const std::string &type,
31+
const framework::VariableNameMap &inputs,
32+
const framework::VariableNameMap &outputs,
33+
const framework::AttributeMap &attrs)
34+
: OperatorBase(type, inputs, outputs, attrs) {}
35+
void Run(const framework::Scope &scope,
36+
const platform::DeviceContext &dev_ctx) const override {
37+
auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
38+
auto &mask = scope.FindVar(Input("Mask"))->Get<framework::LoDTensor>();
39+
auto *out_true =
40+
scope.FindVar(Output("OutTrue"))->GetMutable<framework::LoDTensor>();
41+
auto *out_false =
42+
scope.FindVar(Output("OutFalse"))->GetMutable<framework::LoDTensor>();
43+
auto level = static_cast<size_t>(Attr<int>("level"));
44+
auto &x_lod = x.lod();
45+
auto &mask_dim = mask.dims();
46+
47+
std::unique_ptr<framework::LoDTensor> cpu_mask{new framework::LoDTensor()};
48+
if (platform::is_cpu_place(mask.place())) {
49+
cpu_mask->ShareDataWith(mask);
50+
} else if (platform::is_gpu_place(mask.place())) {
51+
#ifdef PADDLE_WITH_CUDA
52+
cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx);
53+
#else
54+
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
55+
#endif
56+
}
57+
auto *mask_data = cpu_mask->data<bool>();
58+
59+
std::vector<std::vector<CopyRange>> copy_ranges(mask_dim[0]);
60+
61+
// set out_true/out_false lod
62+
for (size_t t = 0; t < 2; t++) {
63+
LoD *lod = nullptr;
64+
if (t == 0) {
65+
lod = out_false->mutable_lod();
66+
} else {
67+
lod = out_true->mutable_lod();
68+
}
69+
lod->clear();
70+
for (size_t i = 0; i < static_cast<size_t>(mask_dim[0]); i++) {
71+
if (static_cast<size_t>(mask_data[i]) == t) {
72+
size_t start_idx = i;
73+
auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset(
74+
x_lod, start_idx, start_idx + 1, level);
75+
76+
auto &lod_length = lod_and_offset.first;
77+
framework::AppendLoD(lod, lod_length);
78+
79+
size_t start_offset = lod_and_offset.second.first;
80+
size_t end_offset = lod_and_offset.second.second;
81+
copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset});
82+
}
83+
}
84+
}
85+
86+
for (size_t t = 0; t < 2; ++t) {
87+
framework::LoDTensor *out;
88+
if (t == 0) {
89+
out = out_false;
90+
} else {
91+
out = out_true;
92+
}
93+
auto &ranges = copy_ranges[t];
94+
size_t height = std::accumulate(
95+
ranges.begin(), ranges.end(), 0UL,
96+
[](size_t a, const CopyRange &b) { return a + b.end - b.begin; });
97+
auto x_dim = x.dims();
98+
x_dim[0] = static_cast<int64_t>(height);
99+
out->Resize(x_dim);
100+
out->mutable_data(x.place(), x.type());
101+
size_t offset = 0;
102+
for (auto &each_range : ranges) {
103+
size_t len = each_range.end - each_range.begin;
104+
if (len == 0) {
105+
continue;
106+
}
107+
// out[offset: offset+len] = x[each_range.begin: each_range.end]
108+
out->Slice(static_cast<int>(offset), static_cast<int>(offset + len))
109+
.CopyFrom(x.Slice(static_cast<int>(each_range.begin),
110+
static_cast<int>(each_range.end)),
111+
x.place(), dev_ctx);
112+
offset += len;
113+
}
114+
}
115+
}
116+
};
117+
118+
class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
119+
public:
120+
SplitLoDTensorOpProtoMaker(framework::OpProto *proto,
121+
framework::OpAttrChecker *op_checker)
122+
: OpProtoAndCheckerMaker(proto, op_checker) {
123+
AddInput("X", "The input LoDTensor");
124+
AddInput("Mask", "A bool column vector which mask the input");
125+
AddOutput("OutTrue", "True branch of input LoDTensor");
126+
AddOutput("OutFalse", "False branch of input LoDTensor");
127+
AddAttr<int>("level", "(int) the specific lod level to split.")
128+
.SetDefault(0)
129+
.EqualGreaterThan(0);
130+
AddComment(
131+
R"DOC(
132+
Split a LoDTensor with a Mask at certain level. The input LoDTensor
133+
has 3 sequence at certain lod level. The Mask is a bool column vector,
134+
such as [0, 1, 0] at the same level. The first and third sequence will
135+
be send to False Output LoDTensor; whereas the second sequence will
136+
be send to True Output LoDTensor. Please refer to MergeLoDTensorOp.)DOC");
137+
}
138+
};
139+
140+
class SplitLoDTensorInferShape : public framework::InferShapeBase {
141+
public:
142+
void operator()(framework::InferShapeContext *context) const override {
143+
PADDLE_ENFORCE(context->HasInput("X"),
144+
"SplitLoDTensorOp must has input X.");
145+
PADDLE_ENFORCE(context->HasInput("Mask"),
146+
"SplitLoDTensorOp must has input Mask.");
147+
PADDLE_ENFORCE(context->HasOutput("OutTrue"),
148+
"SplitLoDTensorOp must has output OutTrue.");
149+
PADDLE_ENFORCE(context->HasOutput("OutFalse"),
150+
"SplitLoDTensorOp must has output OutFalse.");
151+
152+
auto mask_dim = context->GetInputDim("Mask");
153+
PADDLE_ENFORCE_EQ(mask_dim.size(), 2);
154+
PADDLE_ENFORCE_EQ(mask_dim[1], 1);
155+
156+
context->SetOutputDim("OutTrue", context->GetInputDim("X"));
157+
context->SetOutputDim("OutFalse", context->GetInputDim("X"));
158+
}
159+
};
160+
161+
class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker {
162+
public:
163+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
164+
165+
protected:
166+
std::unique_ptr<framework::OpDescBind> Apply() const override {
167+
auto *grad_op = new framework::OpDescBind();
168+
grad_op->SetType("merge_lod_tensor");
169+
grad_op->SetInput("InTrue", OutputGrad("OutTrue"));
170+
grad_op->SetInput("InFalse", OutputGrad("OutFalse"));
171+
grad_op->SetInput("Mask", Input("Mask"));
172+
grad_op->SetInput("X", Input("X"));
173+
grad_op->SetOutput("Out", InputGrad("X"));
174+
grad_op->SetAttrMap(Attrs());
175+
return std::unique_ptr<framework::OpDescBind>(grad_op);
176+
}
177+
};
178+
179+
} // namespace operators
180+
} // namespace paddle
181+
182+
namespace ops = paddle::operators;
183+
REGISTER_OPERATOR(split_lod_tensor, ops::SplitLoDTensorOp,
184+
ops::SplitLoDTensorOpProtoMaker,
185+
ops::SplitLoDTensorInferShape,
186+
ops::SplitLoDTensorArrayGradMaker);

0 commit comments

Comments
 (0)