Skip to content

Commit 9401b64

Browse files
authored
Merge pull request #11877 from reyoung/feature/fix_reshape_op_size
User can register a standard C++ functor as Kernel
2 parents ebadb4c + 550ab8d commit 9401b64

File tree

7 files changed

+261
-239
lines changed

7 files changed

+261
-239
lines changed

paddle/fluid/framework/op_registry.h

Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,23 +76,31 @@ class OpRegistry {
7676
template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
7777
struct OpKernelRegistrarFunctor;
7878

79+
template <typename PlaceType, typename T, typename Func>
80+
inline void RegisterKernelClass(const char* op_type, const char* library_type,
81+
Func func) {
82+
std::string library(library_type);
83+
std::string data_layout = "ANYLAYOUT";
84+
if (library == "MKLDNN") {
85+
data_layout = "MKLDNNLAYOUT";
86+
}
87+
OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(),
88+
StringToDataLayout(data_layout),
89+
StringToLibraryType(library_type));
90+
OperatorWithKernel::AllOpKernels()[op_type][key] = func;
91+
}
92+
7993
template <typename PlaceType, size_t I, typename... KernelTypes>
8094
struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
8195
using KERNEL_TYPE =
8296
typename std::tuple_element<I, std::tuple<KernelTypes...>>::type;
8397

8498
void operator()(const char* op_type, const char* library_type) const {
8599
using T = typename KERNEL_TYPE::ELEMENT_TYPE;
86-
std::string library(library_type);
87-
std::string data_layout = "ANYLAYOUT";
88-
if (library == "MKLDNN") {
89-
data_layout = "MKLDNNLAYOUT";
90-
}
91-
OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(),
92-
StringToDataLayout(data_layout),
93-
StringToLibraryType(library_type));
94-
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE);
95-
100+
RegisterKernelClass<PlaceType, T>(
101+
op_type, library_type, [](const framework::ExecutionContext& ctx) {
102+
KERNEL_TYPE().Compute(ctx);
103+
});
96104
constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
97105
OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
98106
func;
@@ -116,6 +124,47 @@ class OpKernelRegistrar : public Registrar {
116124
}
117125
};
118126

127+
template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
128+
struct OpKernelRegistrarFunctorEx;
129+
130+
template <typename PlaceType, typename... DataTypeAndKernelType>
131+
class OpKernelRegistrarEx : public Registrar {
132+
public:
133+
explicit OpKernelRegistrarEx(const char* op_type, const char* library_type) {
134+
OpKernelRegistrarFunctorEx<PlaceType, false, 0, DataTypeAndKernelType...>
135+
func;
136+
func(op_type, library_type);
137+
}
138+
};
139+
140+
template <typename PlaceType, size_t I, typename... DataTypeAndKernelType>
141+
struct OpKernelRegistrarFunctorEx<PlaceType, true, I,
142+
DataTypeAndKernelType...> {
143+
void operator()(const char* op_type, const char* library_type) const {}
144+
};
145+
146+
template <typename PlaceType, size_t I, typename... DataTypeAndKernelType>
147+
struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
148+
DataTypeAndKernelType...> {
149+
using Functor =
150+
typename std::tuple_element<I + 1,
151+
std::tuple<DataTypeAndKernelType...>>::type;
152+
using T =
153+
typename std::tuple_element<I,
154+
std::tuple<DataTypeAndKernelType...>>::type;
155+
156+
void operator()(const char* op_type, const char* library_type) const {
157+
RegisterKernelClass<PlaceType, T>(op_type, library_type, Functor());
158+
159+
constexpr auto size =
160+
std::tuple_size<std::tuple<DataTypeAndKernelType...>>::value;
161+
OpKernelRegistrarFunctorEx<PlaceType, I + 2 >= size, I + 2,
162+
DataTypeAndKernelType...>
163+
func;
164+
func(op_type, library_type);
165+
}
166+
};
167+
119168
/**
120169
* check if MACRO is used in GLOBAL NAMESPACE.
121170
*/
@@ -174,6 +223,25 @@ class OpKernelRegistrar : public Registrar {
174223
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
175224
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
176225

226+
#define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class, ...) \
227+
STATIC_ASSERT_GLOBAL_NAMESPACE( \
228+
__reg_op_kernel_##op_type##_##library_type##__, \
229+
"REGISTER_OP_KERNEL_EX must be called in global namespace"); \
230+
static ::paddle::framework::OpKernelRegistrarEx<place_class, __VA_ARGS__> \
231+
__op_kernel_registrar_##op_type##_##library_type##__(#op_type, \
232+
#library_type); \
233+
int TouchOpKernelRegistrar_##op_type##_##library_type() { \
234+
__op_kernel_registrar_##op_type##_##library_type##__.Touch(); \
235+
return 0; \
236+
}
237+
238+
#define REGISTER_OP_CUDA_KERNEL_FUNCTOR(op_type, ...) \
239+
REGISTER_OP_KERNEL_EX(op_type, CUDA, ::paddle::platform::CUDAPlace, \
240+
__VA_ARGS__)
241+
242+
#define REGISTER_OP_CPU_KERNEL_FUNCTOR(op_type, ...) \
243+
REGISTER_OP_KERNEL_EX(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
244+
177245
/**
178246
* Macro to mark what Operator and Kernel
179247
* we will use and tell the compiler to

paddle/fluid/framework/operator.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
651651
dev_ctx = pool.Get(expected_kernel_key.place_);
652652
}
653653

654-
kernel_iter->second->Compute(ExecutionContext(*this, exec_scope, *dev_ctx));
654+
kernel_iter->second(ExecutionContext(*this, exec_scope, *dev_ctx));
655655

656656
if (!transfered_inplace_vars.empty()) {
657657
// there is inplace variable has been transfered.

paddle/fluid/framework/operator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,9 @@ class OpKernel : public OpKernelBase {
347347

348348
class OperatorWithKernel : public OperatorBase {
349349
public:
350+
using OpKernelFunc = std::function<void(const ExecutionContext&)>;
350351
using OpKernelMap =
351-
std::unordered_map<OpKernelType, std::unique_ptr<OpKernelBase>,
352-
OpKernelType::Hash>;
352+
std::unordered_map<OpKernelType, OpKernelFunc, OpKernelType::Hash>;
353353

354354
OperatorWithKernel(const std::string& type, const VariableNameMap& inputs,
355355
const VariableNameMap& outputs, const AttributeMap& attrs)

paddle/fluid/operators/fc_mkldnn_op.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ class MKLDNNMemory {
115115

116116
template <typename T>
117117
class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
118+
public:
118119
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
119120
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
120121
"It must use CPUPlace.");

paddle/fluid/operators/reshape_op.cc

Lines changed: 179 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,108 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include "paddle/fluid/operators/reshape_op.h"
16-
1715
#include <string>
1816
#include <vector>
17+
#include "paddle/fluid/framework/op_registry.h"
1918

2019
namespace paddle {
2120
namespace operators {
2221

22+
class ReshapeOp : public framework::OperatorWithKernel {
23+
public:
24+
ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
25+
const framework::VariableNameMap &outputs,
26+
const framework::AttributeMap &attrs)
27+
: OperatorWithKernel(type, inputs, outputs, attrs) {}
28+
29+
void InferShape(framework::InferShapeContext *ctx) const override {
30+
PADDLE_ENFORCE(ctx->HasInput("X"),
31+
"Input(X) of ReshapeOp should not be null.");
32+
PADDLE_ENFORCE(ctx->HasOutput("Out"),
33+
"Output(Out) of ReshapeOp should not be null.");
34+
35+
const std::vector<int> &shape = ctx->Attrs().Get<std::vector<int>>("shape");
36+
PADDLE_ENFORCE(!shape.empty(),
37+
"The shape information must be set by Attr(shape).");
38+
39+
if (ctx->HasInput("Shape") && ctx->IsRuntime()) {
40+
// If true, set the shape of Output(Out) according to Input(Shape) in
41+
// ReshapeKernel with ExecutionContext. Also check LoD in ReshapeKernel.
42+
ctx->ShareLoD("X", /*->*/ "Out");
43+
return;
44+
}
45+
46+
auto x_dims = ctx->GetInputDim("X");
47+
auto out_dims = ValidateShape(shape, x_dims);
48+
ctx->SetOutputDim("Out", out_dims);
49+
if (x_dims[0] == out_dims[0]) {
50+
// Only pass LoD when the first dimension of output and Input(X)
51+
// are the same.
52+
ctx->ShareLoD("X", /*->*/ "Out");
53+
}
54+
}
55+
56+
static framework::DDim ValidateShape(const std::vector<int> shape,
57+
const framework::DDim &in_dims) {
58+
const int64_t in_size = framework::product(in_dims);
59+
// only one dimension can be set to -1, whose size will be automatically
60+
// infered.
61+
const int64_t unk_dim_val = -1;
62+
const int64_t copy_dim_val = 0;
63+
64+
std::vector<int64_t> output_shape(shape.size(), 0);
65+
int64_t capacity = 1;
66+
int unk_dim_idx = -1;
67+
for (size_t i = 0; i < shape.size(); ++i) {
68+
if (shape[i] == unk_dim_val) {
69+
PADDLE_ENFORCE(
70+
unk_dim_idx == -1,
71+
"Only one input dimension of Attr(shape) can be unknown.");
72+
unk_dim_idx = i;
73+
} else if (shape[i] == copy_dim_val) {
74+
PADDLE_ENFORCE(
75+
static_cast<int>(i) < in_dims.size(),
76+
"The index of dimension to copy from input shape must be less "
77+
"than the size of input shape.");
78+
} else {
79+
PADDLE_ENFORCE(
80+
shape[i] > 0,
81+
"Each input dimension of Attr(shape) must not be negtive except "
82+
"one unknown dimension.");
83+
}
84+
85+
capacity *= (shape[i] ? shape[i] : in_dims[i]);
86+
output_shape[i] =
87+
(shape[i] ? static_cast<int64_t>(shape[i]) : in_dims[i]);
88+
}
89+
90+
if (unk_dim_idx != -1) {
91+
if (in_size > 0) {
92+
// in_size < 0 and is un-determinate in compile time, skip the check,
93+
// for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8],
94+
// capacity = -24, in_size = -8, output_shape[0] = 0
95+
// the following check will fail.
96+
output_shape[unk_dim_idx] = -in_size / capacity;
97+
PADDLE_ENFORCE_EQ(output_shape[unk_dim_idx] * capacity, -in_size,
98+
"Invalid shape is given.");
99+
} else {
100+
output_shape[unk_dim_idx] = -1;
101+
}
102+
} else {
103+
PADDLE_ENFORCE_EQ(capacity, in_size, "Invalid shape is given.");
104+
}
105+
return framework::make_ddim(output_shape);
106+
}
107+
108+
protected:
109+
framework::OpKernelType GetExpectedKernelType(
110+
const framework::ExecutionContext &ctx) const override {
111+
return framework::OpKernelType(
112+
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
113+
ctx.device_context());
114+
}
115+
};
116+
23117
class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
24118
public:
25119
void Make() override {
@@ -107,19 +201,93 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
107201
}
108202
};
109203

204+
class ReshapeKernel {
205+
public:
206+
void operator()(const framework::ExecutionContext &ctx) const {
207+
auto *out = ctx.Output<framework::LoDTensor>("Out");
208+
auto *in = ctx.Input<framework::LoDTensor>("X");
209+
210+
auto *shape_tensor = ctx.HasInput("Shape")
211+
? ctx.Input<framework::LoDTensor>("Shape")
212+
: nullptr;
213+
214+
framework::DDim out_dims = out->dims();
215+
216+
if (shape_tensor) {
217+
auto *shape_data = shape_tensor->data<int>();
218+
framework::Tensor cpu_shape_tensor;
219+
if (platform::is_gpu_place(ctx.GetPlace())) {
220+
TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor);
221+
shape_data = cpu_shape_tensor.data<int>();
222+
}
223+
auto shape =
224+
std::vector<int>(shape_data, shape_data + shape_tensor->numel());
225+
out_dims = ReshapeOp::ValidateShape(shape, in->dims());
226+
}
227+
if (!in->lod().empty()) {
228+
PADDLE_ENFORCE_EQ(
229+
out_dims[0], in->dims()[0],
230+
"Reshape operator cannot reshape an input sequence batch "
231+
"into an output sequence batch that has a different "
232+
"number of time steps. Please consider using "
233+
"sequence_reshape op.");
234+
}
235+
236+
bool inplace = ctx.Attr<bool>("inplace");
237+
out->Resize(out_dims);
238+
if (!inplace) {
239+
out->mutable_data(ctx.GetPlace(), in->type());
240+
framework::TensorCopySync(*in, ctx.GetPlace(), out);
241+
out->Resize(out_dims);
242+
} else {
243+
out->ShareDataWith(*in);
244+
out->Resize(out_dims);
245+
}
246+
}
247+
};
248+
249+
class ReshapeGradKernel {
250+
public:
251+
void operator()(const framework::ExecutionContext &ctx) const {
252+
auto *d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
253+
auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
254+
255+
d_x->mutable_data(ctx.GetPlace(), d_out->type());
256+
bool inplace = ctx.Attr<bool>("inplace");
257+
258+
auto in_dims = d_x->dims();
259+
if (!inplace) {
260+
framework::TensorCopy(*d_out, ctx.GetPlace(), ctx.device_context(), d_x);
261+
ctx.device_context().Wait();
262+
d_x->Resize(in_dims);
263+
} else {
264+
d_x->ShareDataWith(*d_out);
265+
d_x->Resize(in_dims);
266+
}
267+
}
268+
};
269+
110270
} // namespace operators
111271
} // namespace paddle
112272
namespace ops = paddle::operators;
113-
using CPU = paddle::platform::CPUDeviceContext;
114273

115274
REGISTER_OPERATOR(reshape, ops::ReshapeOp, ops::ReshapeOpMaker,
116275
paddle::framework::DefaultGradOpDescMaker<true>);
117276
REGISTER_OPERATOR(reshape_grad, ops::ReshapeGradOp);
118-
REGISTER_OP_CPU_KERNEL(reshape, ops::ReshapeKernel<CPU, float>,
119-
ops::ReshapeKernel<CPU, double>,
120-
ops::ReshapeKernel<CPU, int>,
121-
ops::ReshapeKernel<CPU, int64_t>);
122-
REGISTER_OP_CPU_KERNEL(reshape_grad, ops::ReshapeGradKernel<CPU, float>,
123-
ops::ReshapeGradKernel<CPU, double>,
124-
ops::ReshapeGradKernel<CPU, int>,
125-
ops::ReshapeGradKernel<CPU, int64_t>);
277+
REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
278+
ops::ReshapeKernel, int, ops::ReshapeKernel,
279+
int64_t, ops::ReshapeKernel);
280+
REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
281+
double, ops::ReshapeGradKernel, int,
282+
ops::ReshapeGradKernel, int64_t,
283+
ops::ReshapeGradKernel);
284+
285+
#ifdef PADDLE_WITH_CUDA
286+
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
287+
ops::ReshapeKernel, int, ops::ReshapeKernel,
288+
int64_t, ops::ReshapeKernel);
289+
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
290+
double, ops::ReshapeGradKernel, int,
291+
ops::ReshapeGradKernel, int64_t,
292+
ops::ReshapeGradKernel);
293+
#endif

paddle/fluid/operators/reshape_op.cu

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)